SWDEV-1 - Merge github PRs to amd-staging
- https://github.com/ROCm/hip-tests/pull/119
- https://github.com/ROCm/hip-tests/pull/151
- https://github.com/ROCm/hip-tests/pull/57
- https://github.com/ROCm/hip-tests/pull/58
- https://github.com/ROCm/hip-tests/pull/59
- https://github.com/ROCm/hip-tests/pull/60
- https://github.com/ROCm/hip-tests/pull/99
- https://github.com/ROCm/hip-tests/pull/139
- https://github.com/ROCm/hip-tests/pull/152
- https://github.com/ROCm/hip-tests/pull/48
- https://github.com/ROCm/hip-tests/pull/54
- https://github.com/ROCm/hip-tests/pull/53
- https://github.com/ROCm/hip-tests/pull/24
- https://github.com/ROCm/hip-tests/pull/23
- https://github.com/ROCm/hip-tests/pull/22
- https://github.com/ROCm/hip-tests/pull/21
- https://github.com/ROCm/hip-tests/pull/20
- https://github.com/ROCm/hip-tests/pull/14
- https://github.com/ROCm/hip-tests/pull/8
Change-Id: I1eea54cd1436f3ddbfd5c1b3b2f672eb81d03cd4
[ROCm/hip-tests commit: 96df1fde80]
Bu işleme şunda yer alıyor:
@@ -48,11 +48,14 @@
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
|
||||
"Unit_hipFuncSetAttribute_Positive_Parameters",
|
||||
"Unit_hipFuncSetAttribute_Negative_Parameters",
|
||||
"NOTE: The following 4 tests are disabled due to defect - EXSWHTEC-240",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
|
||||
"Unit_hipFuncGetAttributes_Negative_Parameters",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
|
||||
"Unit_hipFuncGetAttributes_Positive_Basic",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-243",
|
||||
"Unit_hipExtLaunchKernel_Negative_Parameters",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
|
||||
@@ -127,6 +130,7 @@
|
||||
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
|
||||
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
|
||||
"Unit_hiprtc_stdheaders",
|
||||
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"Unit_hipMemAddressReserve_AlignmentTest",
|
||||
"Unit_hipMemAddressReserve_Negative",
|
||||
@@ -257,6 +261,54 @@
|
||||
"Unit_Device_Complex_hipCfma_Negative_Parameters_RTC",
|
||||
"Unit_Device_make_Complex_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-151",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
|
||||
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-163",
|
||||
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-164",
|
||||
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-165",
|
||||
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-166",
|
||||
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-167",
|
||||
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
|
||||
"SWDEV-441785: Below tests failing in stress test on 05/01/24 ===",
|
||||
"Unit_hipMemcpyParam2DAsync_Positive_Basic",
|
||||
"Unit_hipMemcpy2DAsync_Positive_Basic",
|
||||
"SWDEV-442583: Below tests failing in stress test on 12/01/24 ===",
|
||||
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice",
|
||||
"=== Below tests are failing PSDB ===",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction",
|
||||
"Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters",
|
||||
"Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow",
|
||||
"Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyParam2D_Positive_Array",
|
||||
"Unit_hipMemcpyParam2DAsync_Positive_Array",
|
||||
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
|
||||
"Unit_hipLaunchCooperativeKernel_Negative_Parameters",
|
||||
"Performance_hipMemsetD16",
|
||||
"Performance_hipMemsetD16Async",
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
|
||||
#endif
|
||||
#if defined VEGA20
|
||||
"=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===",
|
||||
|
||||
@@ -115,11 +115,6 @@
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
|
||||
"Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem",
|
||||
"NOTE: The following 4 tests are disabled due to defect - EXSWHTEC-240",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
|
||||
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
|
||||
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
|
||||
@@ -218,7 +213,17 @@
|
||||
"Unit_hipVectorTypes_test_on_device",
|
||||
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
|
||||
"Unit_hiprtc_stdheaders",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
|
||||
"Unit_hipFuncGetAttributes_Negative_Parameters",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
|
||||
"Unit_hipFuncGetAttributes_Positive_Basic",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-243",
|
||||
"Unit_hipExtLaunchKernel_Negative_Parameters",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"Unit_hipMemAddressReserve_AlignmentTest",
|
||||
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
|
||||
"Unit_hipMemCreate_ChkWithKerLaunch",
|
||||
"Unit_hipMemCreate_MapNonContiguousChunks",
|
||||
"Unit_hipMemMap_MapPartialPhysicalMem",
|
||||
@@ -356,6 +361,85 @@
|
||||
"Unit_hipGetMipmappedArrayLevel_Negative",
|
||||
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
|
||||
"Unit_hipFreeMipmappedArrayMultiTArray - int",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters",
|
||||
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction",
|
||||
"Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters",
|
||||
"Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
|
||||
"Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow",
|
||||
"Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyParam2D_Positive_Array",
|
||||
"Unit_hipMemcpyParam2DAsync_Positive_Array",
|
||||
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
|
||||
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
|
||||
"Performance_hipMemsetD16",
|
||||
"Performance_hipMemsetD16Async",
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-151",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-152",
|
||||
"Unit_hipModuleUnload_Negative_Module_Is_Nullptr",
|
||||
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
|
||||
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-163",
|
||||
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-164",
|
||||
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-165",
|
||||
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-166",
|
||||
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-167",
|
||||
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
|
||||
"Below tests hang in Jenkins PSDB",
|
||||
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint8_t",
|
||||
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint32_t",
|
||||
"=== SWDEV-441604: Below tests take long time to run in stress test on 12/01/24 ===",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Getters_Positive_Basic",
|
||||
#endif
|
||||
"End of json"
|
||||
]
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
|
||||
"Unit_hipModuleUnload_Negative_Double_Unload",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
|
||||
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
|
||||
@@ -70,6 +71,24 @@
|
||||
"Unit_hipFreeMipmappedArrayMultiTArray - int",
|
||||
"Unit_hipFreeMipmappedArray_Negative_Parameters",
|
||||
"Unit_hipCreateSurfaceObject_Negative_Parameters",
|
||||
"Unit_hipDestroySurfaceObject_Negative_Parameters"
|
||||
"Unit_hipDestroySurfaceObject_Negative_Parameters",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
|
||||
"Unit_hipModuleLoad_Positive_Basic",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Unit_hipModuleLoadData_Positive_Basic",
|
||||
"Unit_hipModuleLoadData_Negative_Parameters",
|
||||
"Unit_hipModuleLoadDataEx_Positive_Basic",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Parameters",
|
||||
"Unit_hipModuleGetTexRef_Positive_Basic",
|
||||
"Performance_hipMemsetD16",
|
||||
"Performance_hipMemsetD16Async",
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
"Unit_ChannelDescriptor_Positive_Basic_3D - long3",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_4D - ulong4",
|
||||
"Unit_ChannelDescriptor_Positive_Basic_4D - long4",
|
||||
"Unit_hipModuleUnload_Negative_Double_Unload",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
|
||||
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
|
||||
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
|
||||
@@ -24,6 +25,25 @@
|
||||
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
|
||||
"=== Below 2 tests are disabled due to defect EXSWHTEC-342 ===",
|
||||
"Unit_hipDeviceSetLimit_Negative_Parameters",
|
||||
"Unit_hipDeviceGetLimit_Negative_Parameters"
|
||||
"Unit_hipDeviceGetLimit_Negative_Parameters",
|
||||
"=== Below tests tests fail in PSDB ===",
|
||||
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
|
||||
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
|
||||
"Unit_hipModuleLoad_Positive_Basic",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Unit_hipModuleLoadData_Positive_Basic",
|
||||
"Unit_hipModuleLoadData_Negative_Parameters",
|
||||
"Unit_hipModuleLoadDataEx_Positive_Basic",
|
||||
"Unit_hipModuleLoadDataEx_Negative_Parameters",
|
||||
"Unit_hipModuleGetTexRef_Positive_Basic",
|
||||
"Performance_hipMemsetD16",
|
||||
"Performance_hipMemsetD16Async",
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -144,21 +144,6 @@ THE SOFTWARE.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup StreamOTest Ordered Memory Allocator
|
||||
* @{
|
||||
* This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime
|
||||
* API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup StreamOTest Ordered Memory Allocator
|
||||
* @{
|
||||
* This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime
|
||||
* API.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup StreamOTest Ordered Memory Allocator
|
||||
* @{
|
||||
@@ -201,7 +186,7 @@ THE SOFTWARE.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
/**
|
||||
* @defgroup ComplexTest Complex type
|
||||
* @{
|
||||
* This section describes tests for the Complex type functions.
|
||||
|
||||
@@ -24,10 +24,10 @@ THE SOFTWARE.
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <utils.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static inline unsigned int GenerateLinearAllocationFlagCombinations(
|
||||
const LinearAllocs allocation_type) {
|
||||
@@ -169,8 +169,8 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
|
||||
HIP_CHECK(
|
||||
hipMemcpy(result.host_ptr(), dst_allocation.ptr(), allocation_size, hipMemcpyDeviceToHost));
|
||||
if constexpr (enable_peer_access) {
|
||||
// If we've gotten this far, EnablePeerAccess must have succeeded, so we only need to check this
|
||||
// condition
|
||||
// If we've gotten this far, EnablePeerAccess must have succeeded, so we
|
||||
// only need to check this condition
|
||||
HIP_CHECK(hipDeviceDisablePeerAccess(dst_device));
|
||||
}
|
||||
|
||||
@@ -238,7 +238,6 @@ void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync,
|
||||
LaunchDelayKernel(std::chrono::milliseconds{100}, kernel_stream);
|
||||
HIP_CHECK(memcpy_func());
|
||||
if (should_sync) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
HIP_CHECK(hipStreamQuery(kernel_stream));
|
||||
} else {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady);
|
||||
|
||||
@@ -23,6 +23,7 @@ THE SOFTWARE.
|
||||
#pragma once
|
||||
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
|
||||
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
||||
|
||||
#include <variant>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
@@ -169,3 +169,9 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes)
|
||||
};
|
||||
return (... && DeviceAttributeSupport(device, attributes));
|
||||
}
|
||||
|
||||
inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) {
|
||||
int value = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));
|
||||
return value;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,9 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
add_subdirectory(memset)
|
||||
add_subdirectory(memcpy)
|
||||
add_subdirectory(kernelLaunch)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(event)
|
||||
add_subdirectory(example)
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
kernel_launch_common.cc
|
||||
triple_chevron.cc
|
||||
hipLaunchKernel.cc
|
||||
hipLaunchCooperativeKernel.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC ${TEST_SRC}
|
||||
hipExtLaunchKernel.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
hip_add_exe_to_target(NAME KernelLaunchPerformance
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class ExtLaunchKernelBenchmark
|
||||
: public KernelLaunchBenchmark<ExtLaunchKernelBenchmark<kernel_type, timer_type>, timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(NullKernel), 1, 1, nullptr, 0, nullptr,
|
||||
events_[0], events_[1], 0u);
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithSmallArgs), 1, 1,
|
||||
small_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithMediumArgs), 1, 1,
|
||||
medium_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithLargeArgs), 1, 1,
|
||||
large_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
|
||||
} else
|
||||
;
|
||||
}
|
||||
|
||||
hipError_t GetError() { return error_; }
|
||||
|
||||
private:
|
||||
EventsGuard events_{2};
|
||||
hipError_t error_;
|
||||
|
||||
char* out_ = nullptr;
|
||||
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
|
||||
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
|
||||
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
ExtLaunchKernelBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(benchmark.GetError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using hipExtLaunchKernel:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/hipExtLaunchKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipExtLaunchKernel") {
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
+130
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class LaunchCooperativeKernelBenchmark
|
||||
: public KernelLaunchBenchmark<LaunchCooperativeKernelBenchmark<kernel_type, timer_type>,
|
||||
timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
error_ = hipLaunchCooperativeKernel(reinterpret_cast<void*>(NullKernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
error_ =
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithSmallArgs), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, small_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
error_ =
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithMediumArgs), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, medium_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
error_ =
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithLargeArgs), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, large_kernel_args_, 0, nullptr);
|
||||
} else
|
||||
;
|
||||
}
|
||||
|
||||
hipError_t GetError() { return error_; }
|
||||
|
||||
private:
|
||||
hipError_t error_;
|
||||
|
||||
char* out_ = nullptr;
|
||||
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
|
||||
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
|
||||
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
LaunchCooperativeKernelBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(benchmark.GetError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using hipLaunchCooperativeKernel:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/hipLaunchCooperativeKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Device supports CooperativeLaunch
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipLaunchCooperativeKernel") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class LaunchKernelBenchmark
|
||||
: public KernelLaunchBenchmark<LaunchKernelBenchmark<kernel_type, timer_type>, timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(NullKernel), 1, 1, nullptr, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithSmallArgs), 1, 1,
|
||||
small_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithMediumArgs), 1, 1,
|
||||
medium_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithLargeArgs), 1, 1,
|
||||
large_kernel_args_, 0, nullptr);
|
||||
} else
|
||||
;
|
||||
}
|
||||
|
||||
hipError_t GetError() { return error_; }
|
||||
|
||||
private:
|
||||
hipError_t error_;
|
||||
|
||||
char* out_ = nullptr;
|
||||
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
|
||||
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
|
||||
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
LaunchKernelBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(benchmark.GetError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using hipLaunchKernel:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/hipLaunchKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipLaunchKernel") {
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#define DO_NOT_OPTIMIZE_AWAY \
|
||||
unsigned i = blockIdx.x * blockDim.x + threadIdx.x; \
|
||||
if (out) *out = args.args[i];
|
||||
|
||||
__global__ void NullKernel() {}
|
||||
|
||||
__global__ void KernelWithSmallArgs(SmallKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
|
||||
|
||||
__global__ void KernelWithMediumArgs(MediumKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
|
||||
|
||||
__global__ void KernelWithLargeArgs(LargeKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
|
||||
|
||||
SmallKernelArgs small_kernel_args;
|
||||
MediumKernelArgs medium_kernel_args;
|
||||
LargeKernelArgs large_kernel_args;
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
|
||||
struct SmallKernelArgs {
|
||||
char args[16];
|
||||
};
|
||||
|
||||
struct MediumKernelArgs {
|
||||
char args[256];
|
||||
};
|
||||
|
||||
struct LargeKernelArgs {
|
||||
char args[4080];
|
||||
};
|
||||
|
||||
extern SmallKernelArgs small_kernel_args;
|
||||
extern MediumKernelArgs medium_kernel_args;
|
||||
extern LargeKernelArgs large_kernel_args;
|
||||
|
||||
__global__ void NullKernel();
|
||||
|
||||
__global__ void KernelWithSmallArgs(SmallKernelArgs, char*);
|
||||
|
||||
__global__ void KernelWithMediumArgs(MediumKernelArgs, char*);
|
||||
|
||||
__global__ void KernelWithLargeArgs(LargeKernelArgs, char*);
|
||||
|
||||
enum class KernelType { kNull = 0, kSmall, kMedium, kLarge };
|
||||
|
||||
template <typename Derived, bool timer_type>
|
||||
class KernelLaunchBenchmark : public Benchmark<KernelLaunchBenchmark<Derived, timer_type>> {
|
||||
public:
|
||||
void operator()(bool sync = true) {
|
||||
auto& derived = static_cast<Derived&>(*this);
|
||||
|
||||
if (sync) {
|
||||
TIMED_SECTION(timer_type) { derived.LaunchKernel(); }
|
||||
} else {
|
||||
if (this->current() != this->kWarmup) // if not warmup
|
||||
RunWithoutSynchronization();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void RunWithoutSynchronization() {
|
||||
auto iterations = this->iterations();
|
||||
auto warmups = this->warmups();
|
||||
|
||||
// manually handle iterations here to avoid synchronization after each iteration
|
||||
this->Configure(1, 0);
|
||||
|
||||
this->RegisterModifier([iterations](float time) { return time / iterations; });
|
||||
|
||||
auto& derived = static_cast<Derived&>(*this);
|
||||
|
||||
for (size_t i = 0u; i < warmups; ++i) {
|
||||
derived.LaunchKernel();
|
||||
}
|
||||
|
||||
TIMED_SECTION(timer_type) {
|
||||
for (size_t i = 0u; i < iterations; ++i) {
|
||||
derived.LaunchKernel();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static std::string GetSynchronizationSectionName(bool sync) {
|
||||
return sync ? "with synchronization" : "without synchronization";
|
||||
}
|
||||
|
||||
template <KernelType kernel_type> std::string GetKernelTypeSectionName() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
return "null kernel";
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
return "small kernel";
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
return "medium kernel";
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
return "large kernel";
|
||||
} else {
|
||||
return "unknown kernel type";
|
||||
}
|
||||
}
|
||||
|
||||
template <bool timer_type> std::string GetTimerTypeSectionName() {
|
||||
if constexpr (timer_type == kTimerTypeEvent) {
|
||||
return "event based";
|
||||
} else {
|
||||
return "cpu based";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class TripleChevronBenchmark
|
||||
: public KernelLaunchBenchmark<TripleChevronBenchmark<kernel_type, timer_type>, timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
NullKernel<<<1, 1>>>();
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
KernelWithSmallArgs<<<1, 1>>>(small_kernel_args, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
KernelWithMediumArgs<<<1, 1>>>(medium_kernel_args, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
KernelWithLargeArgs<<<1, 1>>>(large_kernel_args, nullptr);
|
||||
} else
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
TripleChevronBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using triple chevron annotation:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/triple_chevron.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_Triple_Chevron") {
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
hipMemcpy.cc
|
||||
hipMemcpyAsync.cc
|
||||
hipMemcpyWithStream.cc
|
||||
hipMemcpyAtoH.cc
|
||||
hipMemcpyHtoA.cc
|
||||
hipMemcpyDtoD.cc
|
||||
hipMemcpyDtoDAsync.cc
|
||||
hipMemcpyDtoH.cc
|
||||
hipMemcpyDtoHAsync.cc
|
||||
hipMemcpyHtoD.cc
|
||||
hipMemcpyHtoDAsync.cc
|
||||
hipMemcpyToSymbol.cc
|
||||
hipMemcpyToSymbolAsync.cc
|
||||
hipMemcpyFromSymbol.cc
|
||||
hipMemcpyFromSymbolAsync.cc
|
||||
hipMemcpy2D.cc
|
||||
hipMemcpy2DAsync.cc
|
||||
hipMemcpy2DToArray.cc
|
||||
hipMemcpy2DToArrayAsync.cc
|
||||
hipMemcpy2DFromArray.cc
|
||||
hipMemcpy2DFromArrayAsync.cc
|
||||
hipMemcpyParam2D.cc
|
||||
hipMemcpyParam2DAsync.cc
|
||||
hipMemcpy3D.cc
|
||||
hipMemcpy3DAsync.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME MemcpyPerformance
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for all memcpy HIP APIs.
|
||||
*/
|
||||
|
||||
class MemcpyBenchmark : public Benchmark<MemcpyBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy(dst, src, size, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
|
||||
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
MemcpyBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
|
||||
|
||||
if (kind != hipMemcpyDeviceToDevice) {
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
|
||||
} else {
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_DeviceToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_HostToDevice") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_HostToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DBenchmark : public Benchmark<Memcpy2DBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy2D(dst, dst_pitch, src, src_pitch, width, height, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy2DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyDeviceToHost);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyHostToDevice);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
|
||||
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DAsyncBenchmark : public Benchmark<Memcpy2DAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy2DAsync(dst, dst_pitch, src, src_pitch, width, height, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy2DAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyDeviceToHost, stream);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyHostToDevice, stream);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
|
||||
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
hipMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DFromArrayBenchmark : public Benchmark<Memcpy2DFromArrayBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy2DFromArray(dst, dst_pitch, src, 0, 0, width, height, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DFromArrayBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(host_allocation.ptr(), width * sizeof(int), array_allocation.ptr(),
|
||||
width * sizeof(int), height, hipMemcpyDeviceToHost);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
array_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArray` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DFromArrayAsyncBenchmark : public Benchmark<Memcpy2DFromArrayAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height,
|
||||
hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy2DFromArrayAsync(dst, dst_pitch, src, 0, 0, width, height, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DFromArrayAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(host_allocation.ptr(), width * sizeof(int),
|
||||
array_allocation.ptr(), width * sizeof(int),
|
||||
height, hipMemcpyDeviceToHost, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
array_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.height(), hipMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DToArrayBenchmark : public Benchmark<Memcpy2DToArrayBenchmark> {
|
||||
public:
|
||||
void operator()(hipArray_t dst, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy2DToArray(dst, 0, 0, src, src_pitch, width, height, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DToArrayBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyHostToDevice) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), width * sizeof(int),
|
||||
width * sizeof(int), height, hipMemcpyHostToDevice);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArray` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArray_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DToArrayAsyncBenchmark : public Benchmark<Memcpy2DToArrayAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(hipArray_t dst, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy2DToArrayAsync(dst, 0, 0, src, src_pitch, width, height, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DToArrayAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyHostToDevice) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(),
|
||||
width * sizeof(int), width * sizeof(int), height,
|
||||
hipMemcpyHostToDevice, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArrayAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy3DBenchmark : public Benchmark<Memcpy3DBenchmark> {
|
||||
public:
|
||||
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
|
||||
const hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
|
||||
src_ptr, make_hipPos(0, 0, 0),
|
||||
extent, kind);
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy3DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
|
||||
+ ", " + std::to_string(extent.depth) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height()),
|
||||
device_allocation.pitched_ptr(), device_allocation.extent(), kind);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(device_allocation.pitched_ptr(),
|
||||
make_hipPitchedPtr(host_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height()),
|
||||
device_allocation.extent(), kind);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
extent, kind);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard3D<int> src_allocation(extent);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard3D<int> dst_allocation(extent);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
|
||||
dst_allocation.extent(), kind);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy3DAsyncBenchmark : public Benchmark<Memcpy3DAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
|
||||
const hipExtent extent, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
|
||||
src_ptr, make_hipPos(0, 0, 0),
|
||||
extent, kind);
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy3DAsync(¶ms, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy3DAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
|
||||
+ ", " + std::to_string(extent.depth) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height()),
|
||||
device_allocation.pitched_ptr(), device_allocation.extent(), kind, stream);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(device_allocation.pitched_ptr(),
|
||||
make_hipPitchedPtr(host_allocation.ptr(),
|
||||
device_allocation.pitch(),
|
||||
device_allocation.width(),
|
||||
device_allocation.height()),
|
||||
device_allocation.extent(), kind, stream);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
extent, kind, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard3D<int> src_allocation(extent);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard3D<int> dst_allocation(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
|
||||
dst_allocation.extent(), kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyAsyncBenchmark : public Benchmark<MemcpyAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyAsync(dst, src, size, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
|
||||
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
MemcpyAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard{Streams::created};
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
if (kind != hipMemcpyDeviceToDevice) {
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
} else {
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_DeviceToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_HostToDevice") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_HostToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyAtoHBenchmark : public Benchmark<MemcpyAtoHBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, hipArray_t src_array, size_t allocation_size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyAtoH(dst, src_array, 0, allocation_size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
|
||||
MemcpyAtoHBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(width));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
size_t allocation_size = width * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
|
||||
benchmark.Run(host_allocation.ptr(), array_allocation.ptr(), allocation_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAtoH` from Device array to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 512 B
|
||||
* - Medium: 1024 B
|
||||
* - Large: 4096 B
|
||||
* -# Allocation type
|
||||
* - Host: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAtoH.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAtoH") {
|
||||
const auto allocation_size = GENERATE(512, 1024, 4096);
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoDBenchmark : public Benchmark<MemcpyDtoDBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyDtoD(dst, src, size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
|
||||
MemcpyDtoDBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
|
||||
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()), size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoD` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoD.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessEnabled") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoD.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessDisabled") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoDAsyncBenchmark : public Benchmark<MemcpyDtoDAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyDtoDAsync(dst, src, size, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
|
||||
MemcpyDtoDAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
|
||||
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()),
|
||||
size, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoDAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoDAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessEnabled") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoDAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessDisabled") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoHBenchmark : public Benchmark<MemcpyDtoHBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const hipDeviceptr_t& src, size_t size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyDtoH(dst, src, size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyDtoHBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(host_allocation.ptr(),
|
||||
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
|
||||
size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoH` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoH.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoH") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoHAsyncBenchmark : public Benchmark<MemcpyDtoHAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyDtoHAsync(dst, src, size, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyDtoHAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(host_allocation.ptr(),
|
||||
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
|
||||
size, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoHAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoHAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoHAsync") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyFromSymbolBenchmark : public Benchmark<MemcpyFromSymbolBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, void* result, size_t size, size_t offset) {
|
||||
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyFromSymbol(result, HIP_SYMBOL(devSymbol), size, offset));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
|
||||
MemcpyFromSymbolBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
benchmark.Run(source, result, size, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbol` from Device to Host.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbol_SingularValue") {
|
||||
int set{42};
|
||||
int result{0};
|
||||
RunBenchmark(&set, &result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbol` from Device to Host.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbol_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
std::vector<int> result(size);
|
||||
std::fill_n(result.data(), size, 0);
|
||||
|
||||
RunBenchmark(array.data(), result.data(), sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbol` from Device to Host.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbol_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
std::vector<int> result(size);
|
||||
std::fill_n(result.data(), size, 0);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array.data() + offset, result.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyFromSymbolAsyncBenchmark : public Benchmark<MemcpyFromSymbolAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, void* result, size_t size, size_t offset, const hipStream_t& stream) {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(result, HIP_SYMBOL(devSymbol), size, offset,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
|
||||
MemcpyFromSymbolAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
benchmark.Run(source, result, size, offset, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_SingularValue") {
|
||||
int set{42};
|
||||
int result{0};
|
||||
RunBenchmark(&set, &result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
std::vector<int> result(size);
|
||||
std::fill_n(result.data(), size, 0);
|
||||
|
||||
RunBenchmark(array.data(), result.data(), sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
std::vector<int> result(size);
|
||||
std::fill_n(result.data(), size, 0);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array.data() + offset, result.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyHtoABenchmark : public Benchmark<MemcpyHtoABenchmark> {
|
||||
public:
|
||||
void operator()(hipArray_t dst_array, const void* src, size_t allocation_size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyHtoA(dst_array, 0, src, allocation_size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
|
||||
MemcpyHtoABenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(width));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
size_t allocation_size = width * sizeof(int);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
|
||||
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), allocation_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyHtoA` from Host to Device array:
|
||||
* -# Allocation size
|
||||
* - Small: 512 B
|
||||
* - Medium: 1024 B
|
||||
* - Large: 4096 B
|
||||
* -# Allocation type
|
||||
* - Host: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyHtoA.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyHtoA") {
|
||||
const auto allocation_size = GENERATE(512, 1024, 4096);
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyHtoDBenchmark : public Benchmark<MemcpyHtoDBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, void* src, size_t size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyHtoD(dst, src, size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyHtoDBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()), host_allocation.ptr(), size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyHtoD` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyHtoD.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyHtoD") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyHtoDAsyncBenchmark : public Benchmark<MemcpyHtoDAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, void* src, size_t size, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyHtoDAsync(dst, src, size, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyHtoDAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
|
||||
host_allocation.ptr(), size, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyHtoD` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyHtoDAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyHtoDAsync") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
|
||||
width, height, kind);
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyParam2D(¶ms));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
MemcpyParam2DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(), kind);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(), kind);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
|
||||
src_allocation.ptr(), width * sizeof(int),
|
||||
width * sizeof(int), height, kind);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
kind);
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
|
||||
width, height, kind);
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyParam2DAsync(¶ms, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
MemcpyParam2DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
kind, stream);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
kind, stream);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
|
||||
src_allocation.ptr(), width * sizeof(int),
|
||||
width * sizeof(int), height, kind, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyToSymbolBenchmark : public Benchmark<MemcpyToSymbolBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, size_t size, size_t offset) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
|
||||
MemcpyToSymbolBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
benchmark.Run(source, size, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbol` from Host to Device.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbol_SingularValue") {
|
||||
int set{42};
|
||||
RunBenchmark(&set);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbol` from Host to Device.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbol_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
|
||||
RunBenchmark(array.data(), sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbol` from Host to Device.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbol_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyToSymbolAsyncBenchmark : public Benchmark<MemcpyToSymbolAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, size_t size, size_t offset, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
|
||||
MemcpyToSymbolAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
benchmark.Run(source, size, offset, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbolAsync_SingularValue") {
|
||||
int set{42};
|
||||
RunBenchmark(&set);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbolAsync_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
|
||||
RunBenchmark(array.data(), sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbolAsync_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
std::vector<int> array(size);
|
||||
std::fill_n(array.data(), size, 42);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyWithStreamBenchmark : public Benchmark<MemcpyWithStreamBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, hipStream_t stream) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyWithStream(dst, src, size, kind, stream));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
|
||||
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
MemcpyWithStreamBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind != hipMemcpyDeviceToDevice) {
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
} else {
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_HostToDevice") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_HostToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <performance_common.hh>
|
||||
|
||||
static hip_Memcpy2D CreateMemcpy2DParam(void* dst, size_t dpitch, void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
hip_Memcpy2D params = {};
|
||||
memset(¶ms, 0, sizeof(hip_Memcpy2D));
|
||||
|
||||
const hipExtent src_offset = {0, 0, 0};
|
||||
const hipExtent dst_offset = {0, 0, 0};
|
||||
|
||||
params.dstPitch = dpitch;
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyHostToHost:
|
||||
params.dstMemoryType = hipMemoryTypeHost;
|
||||
params.dstHost = dst;
|
||||
break;
|
||||
case hipMemcpyDeviceToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
params.dstMemoryType = hipMemoryTypeDevice;
|
||||
params.dstDevice = reinterpret_cast<hipDeviceptr_t>(dst);
|
||||
break;
|
||||
default:
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
params.srcPitch = dpitch;
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyHostToHost:
|
||||
params.srcMemoryType = hipMemoryTypeHost;
|
||||
params.srcHost = src;
|
||||
break;
|
||||
case hipMemcpyDeviceToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
params.srcMemoryType = hipMemoryTypeDevice;
|
||||
params.srcDevice = reinterpret_cast<hipDeviceptr_t>(src);
|
||||
break;
|
||||
default:
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
params.WidthInBytes = width;
|
||||
params.Height = height;
|
||||
params.srcXInBytes = src_offset.width;
|
||||
params.srcY = src_offset.height;
|
||||
params.dstXInBytes = dst_offset.width;
|
||||
params.dstY = dst_offset.height;
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
static hipMemcpy3DParms CreateMemcpy3DParam(hipPitchedPtr dst_ptr, hipPos dst_pos,
|
||||
hipPitchedPtr src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms params = {};
|
||||
memset(¶ms, 0, sizeof(hipMemcpy3DParms));
|
||||
params.dstPtr = dst_ptr;
|
||||
params.dstPos = dst_pos;
|
||||
params.srcPtr = src_ptr;
|
||||
params.srcPos = src_pos;
|
||||
params.extent = extent;
|
||||
params.kind = kind;
|
||||
return params;
|
||||
}
|
||||
|
||||
static std::tuple<int, int> GetDeviceIds(bool enable_peer_access) {
|
||||
int src_device = 0;
|
||||
int dst_device = 1;
|
||||
|
||||
if (enable_peer_access) {
|
||||
int can_access_peer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
INFO("Peer access cannot be enabled between devices " << src_device << " and " << dst_device);
|
||||
REQUIRE(can_access_peer);
|
||||
}
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
|
||||
} else {
|
||||
dst_device = 0;
|
||||
}
|
||||
|
||||
return {src_device, dst_device};
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
hipMemset.cc
|
||||
hipMemsetAsync.cc
|
||||
hipMemsetD8.cc
|
||||
hipMemsetD8Async.cc
|
||||
hipMemsetD16.cc
|
||||
hipMemsetD16Async.cc
|
||||
hipMemsetD32.cc
|
||||
hipMemsetD32Async.cc
|
||||
hipMemset2D.cc
|
||||
hipMemset2DAsync.cc
|
||||
hipMemset3D.cc
|
||||
hipMemset3DAsync.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME MemsetPerformance
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for all memset HIP APIs.
|
||||
*/
|
||||
|
||||
class MemsetBenchmark : public Benchmark<MemsetBenchmark> {
|
||||
public:
|
||||
MemsetBenchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION(kTimerTypeEvent) { HIP_CHECK(hipMemset(dst_.ptr(), 17, size_)); }
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetBenchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemset`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemset.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemset") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memset2DBenchmark : public Benchmark<Memset2DBenchmark> {
|
||||
public:
|
||||
Memset2DBenchmark(size_t width, size_t height) : dst_(width, height) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION(kTimerTypeEvent) {
|
||||
HIP_CHECK(hipMemset2D(dst_.ptr(), dst_.pitch(), 17, dst_.width(), dst_.height()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard2D<char> dst_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height) {
|
||||
Memset2DBenchmark benchmark(width, height);
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemset2D`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemset2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemset2D") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memset2DAsyncBenchmark : public Benchmark<Memset2DAsyncBenchmark> {
|
||||
public:
|
||||
Memset2DAsyncBenchmark(size_t width, size_t height)
|
||||
: dst_(width, height), stream_(Streams::created) {}
|
||||
|
||||
void operator()(size_t width, size_t height) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
|
||||
HIP_CHECK(hipMemset2DAsync(dst_.ptr(), dst_.pitch(), 17, dst_.width(), dst_.height(),
|
||||
stream_.stream()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard2D<char> dst_;
|
||||
StreamGuard stream_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height) {
|
||||
Memset2DAsyncBenchmark benchmark(width, height);
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
benchmark.Run(width, height);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemset2DAsync`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemset2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemset2DAsync") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32);
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memset3DBenchmark : public Benchmark<Memset3DBenchmark> {
|
||||
public:
|
||||
Memset3DBenchmark(size_t width, size_t height, size_t depth) : dst_(width, height, depth) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION(kTimerTypeEvent) {
|
||||
HIP_CHECK(hipMemset3D(dst_.pitched_ptr(), 17, dst_.extent()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard3D<char> dst_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, size_t depth) {
|
||||
Memset3DBenchmark benchmark(width, height, depth);
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ", " +
|
||||
std::to_string(depth) + ")");
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemset3D`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemset3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemset3D") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 16, 4);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memset3DAsyncBenchmark : public Benchmark<Memset3DAsyncBenchmark> {
|
||||
public:
|
||||
Memset3DAsyncBenchmark(size_t width, size_t height, size_t depth)
|
||||
: dst_(width, height, depth), stream_(Streams::created) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
|
||||
HIP_CHECK(hipMemset3DAsync(dst_.pitched_ptr(), 17, dst_.extent(), stream_.stream()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard3D<char> dst_;
|
||||
StreamGuard stream_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, size_t depth) {
|
||||
Memset3DAsyncBenchmark benchmark(width, height, depth);
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ", " +
|
||||
std::to_string(depth) + ")");
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemset3DAsync`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemset3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemset3DAsync") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 16, 4);
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetAsyncBenchmark : public Benchmark<MemsetAsyncBenchmark> {
|
||||
public:
|
||||
MemsetAsyncBenchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
|
||||
HIP_CHECK(hipMemsetAsync(dst_.ptr(), 17, size_, stream_.stream()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
StreamGuard stream_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetAsyncBenchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetAsync`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetAsync") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetD16Benchmark : public Benchmark<MemsetD16Benchmark> {
|
||||
public:
|
||||
MemsetD16Benchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION(kTimerTypeEvent) {
|
||||
HIP_CHECK(hipMemsetD16(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 311, size_));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetD16Benchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetD16`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetD16.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetD16") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetD16AsyncBenchmark : public Benchmark<MemsetD16AsyncBenchmark> {
|
||||
public:
|
||||
MemsetD16AsyncBenchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
|
||||
HIP_CHECK(hipMemsetD16Async(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 311, size_,
|
||||
stream_.stream()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
StreamGuard stream_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetD16AsyncBenchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetD16Async`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetD16Async.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetD16Async") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetD32Benchmark : public Benchmark<MemsetD32Benchmark> {
|
||||
public:
|
||||
MemsetD32Benchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION(kTimerTypeEvent) {
|
||||
HIP_CHECK(hipMemsetD32(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 123'456, size_));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetD32Benchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetD32`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetD32.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetD32") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetD32AsyncBenchmark : public Benchmark<MemsetD32AsyncBenchmark> {
|
||||
public:
|
||||
MemsetD32AsyncBenchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
|
||||
HIP_CHECK(hipMemsetD32Async(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 123'456, size_,
|
||||
stream_.stream()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
StreamGuard stream_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetD32AsyncBenchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetD32Async`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetD32Async.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetD32Async") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetD8Benchmark : public Benchmark<MemsetD8Benchmark> {
|
||||
public:
|
||||
MemsetD8Benchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION(kTimerTypeEvent) {
|
||||
HIP_CHECK(hipMemsetD8(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 17, size_));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetD8Benchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetD8`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetD8.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetD8") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup memset memset
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemsetD8AsyncBenchmark : public Benchmark<MemsetD8AsyncBenchmark> {
|
||||
public:
|
||||
MemsetD8AsyncBenchmark(LinearAllocs allocation_type, size_t size)
|
||||
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
|
||||
|
||||
void operator()() {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
|
||||
HIP_CHECK(hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 17, size_,
|
||||
stream_.stream()));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
LinearAllocGuard<void> dst_;
|
||||
const size_t size_;
|
||||
StreamGuard stream_;
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
|
||||
MemsetD8AsyncBenchmark benchmark(allocation_type, size);
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
|
||||
benchmark.Run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemsetD8Async`:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - device
|
||||
* - host
|
||||
* - managed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memset/hipMemsetD8Async.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemsetD8Async") {
|
||||
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
|
||||
LinearAllocs::hipMallocManaged);
|
||||
RunBenchmark(allocation_type, size);
|
||||
}
|
||||
@@ -35,9 +35,9 @@ add_subdirectory(multiThread)
|
||||
add_subdirectory(compiler)
|
||||
add_subdirectory(errorHandling)
|
||||
add_subdirectory(cooperativeGrps)
|
||||
add_subdirectory(warp)
|
||||
add_subdirectory(context)
|
||||
add_subdirectory(device_memory)
|
||||
add_subdirectory(warp)
|
||||
add_subdirectory(dynamicLoading)
|
||||
add_subdirectory(g++)
|
||||
add_subdirectory(module)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
thread_block.cc
|
||||
thread_block_tile.cc
|
||||
hipCGThreadBlockType_old.cc
|
||||
hipCGMultiGridGroupType_old.cc
|
||||
hipCGGridGroupType_old.cc
|
||||
|
||||
@@ -31,6 +31,14 @@ constexpr size_t kWarpSize = 64;
|
||||
constexpr int kMaxGPUs = 8;
|
||||
} // namespace
|
||||
|
||||
constexpr int MaxGPUs = 8;
|
||||
|
||||
inline bool operator==(const dim3& l, const dim3& r) {
|
||||
return l.x == r.x && l.y == r.y && l.z == r.z;
|
||||
}
|
||||
|
||||
inline bool operator!=(const dim3& l, const dim3& r) { return !(l == r); }
|
||||
|
||||
__device__ inline unsigned int thread_rank_in_grid() {
|
||||
const auto block_size = blockDim.x * blockDim.y * blockDim.z;
|
||||
const auto block_rank_in_grid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
@@ -67,4 +75,4 @@ template <class T> bool CheckDimensions(unsigned int device, T kernel, dim3 bloc
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,350 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cooperative_groups_common.hh"
|
||||
|
||||
#include <cpu_grid.h>
|
||||
#include <optional>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
#include <cmd_options.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup thread_block thread_block
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* Contains unit tests for all thread_block APIs
|
||||
*/
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
template <typename BaseType = cg::thread_block>
|
||||
static __global__ void thread_block_size_getter(unsigned int* sizes) {
|
||||
const BaseType group = cg::this_thread_block();
|
||||
sizes[thread_rank_in_grid()] = group.size();
|
||||
}
|
||||
|
||||
template <typename BaseType = cg::thread_block>
|
||||
static __global__ void thread_block_thread_rank_getter(unsigned int* thread_ranks) {
|
||||
const BaseType group = cg::this_thread_block();
|
||||
thread_ranks[thread_rank_in_grid()] = group.thread_rank();
|
||||
}
|
||||
|
||||
static __global__ void thread_block_group_indices_getter(dim3* group_indices) {
|
||||
group_indices[thread_rank_in_grid()] = cg::this_thread_block().group_index();
|
||||
}
|
||||
|
||||
static __global__ void thread_block_thread_indices_getter(dim3* thread_indices) {
|
||||
thread_indices[thread_rank_in_grid()] = cg::this_thread_block().thread_index();
|
||||
}
|
||||
|
||||
static __global__ void thread_block_non_member_size_getter(unsigned int* sizes) {
|
||||
sizes[thread_rank_in_grid()] = cg::group_size(cg::this_thread_block());
|
||||
}
|
||||
|
||||
static __global__ void thread_block_non_member_thread_rank_getter(unsigned int* thread_ranks) {
|
||||
thread_ranks[thread_rank_in_grid()] = cg::thread_rank(cg::this_thread_block());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels that write the return values of size, thread_rank, group_index, and
|
||||
* thread_index member functions to an output array that is validated on the host side. The kernels
|
||||
* are run sequentially, reusing the output array, to avoid running out of device memory for large
|
||||
* kernel launches.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Thread_Block_Getters_Positive_Basic") {
|
||||
const auto blocks = GenerateBlockDimensions();
|
||||
const auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
const CPUGrid grid(blocks, threads);
|
||||
|
||||
{
|
||||
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
|
||||
thread_block_size_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
thread_block_thread_rank_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
// Validate thread_block.size() values
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
|
||||
[size = grid.threads_in_block_count_](uint32_t) { return size; });
|
||||
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Validate thread_block.thread_rank() values
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
|
||||
[&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); });
|
||||
}
|
||||
|
||||
{
|
||||
LinearAllocGuard<dim3> dim3_arr_dev(LinearAllocs::hipMalloc, grid.thread_count_ * sizeof(dim3));
|
||||
LinearAllocGuard<dim3> dim3_arr(LinearAllocs::hipHostMalloc, grid.thread_count_ * sizeof(dim3));
|
||||
|
||||
thread_block_group_indices_getter<<<blocks, threads>>>(dim3_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(dim3_arr.ptr(), dim3_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*dim3_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
thread_block_thread_indices_getter<<<blocks, threads>>>(dim3_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
// Validate thread_block.group_index() values
|
||||
ArrayAllOf(dim3_arr.ptr(), grid.thread_count_,
|
||||
[&grid](uint32_t i) { return grid.block_idx(i).value(); });
|
||||
|
||||
HIP_CHECK(hipMemcpy(dim3_arr.ptr(), dim3_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*dim3_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Validate thread_block.thread_index() values
|
||||
ArrayAllOf(dim3_arr.ptr(), grid.thread_count_,
|
||||
[&grid](uint32_t i) { return grid.thread_idx(i).value(); });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels that write the return values of size and thread_rank member functions to an
|
||||
* output array that is validated on the host side, while treating the thread block as a thread
|
||||
* group. The kernels are run sequentially, reusing the output array, to avoid running out of device
|
||||
* memory for large kernel launches.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Thread_Block_Getters_Via_Base_Type_Positive_Basic") {
|
||||
const auto blocks = GenerateBlockDimensions();
|
||||
const auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
|
||||
const CPUGrid grid(blocks, threads);
|
||||
|
||||
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
|
||||
thread_block_size_getter<cg::thread_group><<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
thread_block_thread_rank_getter<cg::thread_group><<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
// Validate thread_block.size() values
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
|
||||
[size = grid.threads_in_block_count_](uint32_t) { return size; });
|
||||
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Validate thread_block.thread_rank() values
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
|
||||
[&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); });
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels that write the return values of size and thread_rank non-member functions
|
||||
* to an output array that is validated on the host side. The kernels are run sequentially, reusing
|
||||
* the output array, to avoid running out of device memory for large kernel launches.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Thread_Block_Getters_Via_Non_Member_Functions_Positive_Basic") {
|
||||
const auto blocks = GenerateBlockDimensions();
|
||||
const auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
|
||||
const CPUGrid grid(blocks, threads);
|
||||
|
||||
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
|
||||
thread_block_non_member_size_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
thread_block_non_member_thread_rank_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
// Validate thread_block.size() values
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
|
||||
[size = grid.threads_in_block_count_](uint32_t) { return size; });
|
||||
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
|
||||
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Validate thread_block.thread_rank() values
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
|
||||
[&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); });
|
||||
}
|
||||
|
||||
|
||||
template <bool use_global, typename T>
|
||||
__global__ void thread_block_sync_check(T* global_data, unsigned int* wait_modifiers,
|
||||
unsigned int* read_offsets) {
|
||||
extern __shared__ uint8_t shared_data[];
|
||||
T* const data = use_global ? global_data : reinterpret_cast<T*>(shared_data);
|
||||
const auto block = cg::this_thread_block();
|
||||
constexpr T divisor = 255;
|
||||
const auto tid = block.thread_rank();
|
||||
const auto wait_modifier = wait_modifiers[tid];
|
||||
const auto read_offset = read_offsets[tid];
|
||||
busy_wait(wait_modifier);
|
||||
data[tid] = tid % divisor;
|
||||
block.sync();
|
||||
bool valid = true;
|
||||
for (auto i = 0; i < block.size(); ++i) {
|
||||
const auto offset = block.size() + read_offset;
|
||||
const auto expected = (tid + offset + i) % block.size();
|
||||
if (!(valid &= (data[expected] == expected % divisor))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
block.sync();
|
||||
data[tid] = valid;
|
||||
if constexpr (!use_global) {
|
||||
global_data[tid] = data[tid];
|
||||
}
|
||||
}
|
||||
|
||||
static inline std::mt19937& GetRandomGenerator() {
|
||||
// With a static seed the tests will remain consistent between runs, yet it relieves the problem
|
||||
// of predetermining a set of modifiers by hand. The sets of modifiers could actually be
|
||||
// determined at compile time if std::random objects could operate in a constexpr context.
|
||||
static std::mt19937 mt(17);
|
||||
return mt;
|
||||
}
|
||||
|
||||
template <typename T> static inline T GenerateRandomInteger(const T min, const T max) {
|
||||
std::uniform_int_distribution<T> dist(min, max);
|
||||
return dist(GetRandomGenerator());
|
||||
}
|
||||
|
||||
template <bool global_memory, typename T> void ThreadBlockSyncTest() {
|
||||
const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations));
|
||||
INFO("Run number: " << randomized_run_count + 1);
|
||||
const auto blocks = dim3(1, 1, 1);
|
||||
const auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
int max_shared_mem_per_block = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block,
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
||||
if (!global_memory && max_shared_mem_per_block < alloc_size) {
|
||||
return;
|
||||
}
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
LinearAllocGuard<unsigned int> wait_modifiers_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
LinearAllocGuard<unsigned int> wait_modifiers(LinearAllocs::hipHostMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_,
|
||||
[&] { return GenerateRandomInteger(0u, 1500u); });
|
||||
|
||||
LinearAllocGuard<unsigned int> read_offsets_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
std::vector<unsigned int> read_offsets(grid.thread_count_, 0u);
|
||||
if (randomized_run_count != 0) {
|
||||
std::generate(read_offsets.begin(), read_offsets.end(),
|
||||
[&] { return GenerateRandomInteger(0u, grid.thread_count_); });
|
||||
}
|
||||
|
||||
const auto shared_memory_size = global_memory ? 0u : alloc_size;
|
||||
HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(),
|
||||
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(read_offsets_dev.ptr(), read_offsets.data(),
|
||||
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
|
||||
thread_block_sync_check<global_memory><<<blocks, threads, shared_memory_size>>>(
|
||||
arr_dev.ptr(), wait_modifiers_dev.ptr(), read_offsets_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
REQUIRE(std::all_of(arr.ptr(), arr.ptr() + grid.thread_count_, [](unsigned int e) { return e; }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches a kernel wherein every thread writes its grid-wide linear index into an array. The
|
||||
* array is either in global or dynamic shared memory based on a compile time switch, and the test
|
||||
* is run for arrays of 1, 2, and 4 byte elements. Before the write each thread executes a busy wait
|
||||
* loop for a random amount of clock cycles, the amount being read from an input array. After the
|
||||
* write a block-wide sync is performed and each thread validates that it can read the expected
|
||||
* values that other threads have written to their respective array slots. Each thread begins the
|
||||
* validation from a given offset from its own index. For the first run of the test, all the offsets
|
||||
* are zero, so memory reads should be coalesced as adjacent threads read from adjacent memory
|
||||
* locations. On subsequent runs the offsets are randomized for each thread, leading to
|
||||
* non-coalesced reads and cache thrashing.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Thread_Block_Sync_Positive_Basic", "", uint8_t, uint16_t, uint32_t) {
|
||||
SECTION("Global memory") { ThreadBlockSyncTest<true, TestType>(); }
|
||||
SECTION("Shared memory") { ThreadBlockSyncTest<false, TestType>(); }
|
||||
}
|
||||
@@ -0,0 +1,553 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cooperative_groups_common.hh"
|
||||
|
||||
#include <bitset>
|
||||
#include <array>
|
||||
|
||||
#include <cmd_options.hh>
|
||||
#include <cpu_grid.h>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
|
||||
/**
|
||||
* @addtogroup thread_block_tile thread_block_tile
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* Contains unit tests for all thread_block_tile APIs and dynamic block partitioning
|
||||
*/
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
template <bool dynamic, unsigned int tile_size>
|
||||
__global__ void thread_block_partition_size_getter(unsigned int* sizes) {
|
||||
const auto group = cg::this_thread_block();
|
||||
if constexpr (dynamic) {
|
||||
sizes[thread_rank_in_grid()] = cg::tiled_partition(group, tile_size).size();
|
||||
} else {
|
||||
cg::thread_block_tile<tile_size> tiled_partition = cg::tiled_partition<tile_size>(group);
|
||||
sizes[thread_rank_in_grid()] = tiled_partition.size();
|
||||
}
|
||||
}
|
||||
|
||||
template <bool dynamic, unsigned int tile_size>
|
||||
__global__ void thread_block_partition_thread_rank_getter(unsigned int* thread_ranks) {
|
||||
const auto group = cg::this_thread_block();
|
||||
if constexpr (dynamic) {
|
||||
thread_ranks[thread_rank_in_grid()] = cg::tiled_partition(group, tile_size).thread_rank();
|
||||
} else {
|
||||
cg::thread_block_tile<tile_size> tiled_partition = cg::tiled_partition<tile_size>(group);
|
||||
thread_ranks[thread_rank_in_grid()] = tiled_partition.thread_rank();
|
||||
}
|
||||
}
|
||||
|
||||
template <bool dynamic, size_t tile_size> void BlockPartitionGettersBasicTestImpl() {
|
||||
DYNAMIC_SECTION("Tile size: " << tile_size) {
|
||||
auto blocks = GenerateBlockDimensions();
|
||||
auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(unsigned int);
|
||||
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
thread_block_partition_size_getter<dynamic, tile_size><<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
thread_block_partition_thread_rank_getter<dynamic, tile_size>
|
||||
<<<blocks, threads>>>(uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_, [&grid](unsigned int i) {
|
||||
if constexpr (!dynamic) {
|
||||
return tile_size;
|
||||
}
|
||||
|
||||
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
|
||||
const auto rank_in_block = grid.thread_rank_in_block(i).value();
|
||||
|
||||
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
|
||||
return tile_size - tail * (rank_in_block >= (partitions_in_block - 1) * tile_size);
|
||||
});
|
||||
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
ArrayAllOf(uint_arr.ptr(), grid.thread_count_, [&grid](unsigned int i) {
|
||||
return grid.thread_rank_in_block(i).value() % tile_size;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <bool dynamic, size_t... tile_sizes> void BlockPartitionGettersBasicTest() {
|
||||
static_cast<void>((BlockPartitionGettersBasicTestImpl<dynamic, tile_sizes>(), ...));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Creates tiled partitions for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes
|
||||
* the return values of size and thread_rank member functions to an output array that is validated
|
||||
* on the host side.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Thread_Block_Tile_Getters_Positive_Basic") {
|
||||
BlockPartitionGettersBasicTest<false, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockPartitionGettersBasicTest<false, 64>();
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Creates tiled partitions for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} via the
|
||||
* dynamic tiled partition api and writes the return values of size and thread_rank member functions
|
||||
* to an output array that is validated on host.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic") {
|
||||
BlockPartitionGettersBasicTest<true, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockPartitionGettersBasicTest<true, 64>();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename T, size_t tile_size>
|
||||
__global__ void block_tile_shfl_up(T* const out, const unsigned int delta) {
|
||||
const cg::thread_block_tile<tile_size> partition =
|
||||
cg::tiled_partition<tile_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(partition.thread_rank());
|
||||
out[thread_rank_in_grid()] = partition.shfl_up(var, delta);
|
||||
}
|
||||
|
||||
template <typename T, size_t tile_size> void BlockTileShflUpTestImpl() {
|
||||
DYNAMIC_SECTION("Tile size: " << tile_size) {
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
auto delta = GENERATE(range(static_cast<size_t>(0), tile_size));
|
||||
INFO("Delta: " << delta);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
block_tile_shfl_up<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), delta);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
ArrayAllOf(arr.ptr(), grid.thread_count_, [delta, &grid](unsigned int i) -> std::optional<T> {
|
||||
const int rank_in_partition = grid.thread_rank_in_block(i).value() % tile_size;
|
||||
const int target = rank_in_partition - delta;
|
||||
return target < 0 ? rank_in_partition : target;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, size_t... tile_sizes> void BlockTileShflUpTest() {
|
||||
static_cast<void>((BlockTileShflUpTestImpl<T, tile_sizes>(), ...));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle up behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32,
|
||||
* 64(if AMD)} for delta values of [0, tile size). The test is run for all overloads of shfl_up.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic", "", int, unsigned int, long,
|
||||
unsigned long, long long, unsigned long long, float, double) {
|
||||
BlockTileShflUpTest<TestType, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockTileShflUpTest<TestType, 64>();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename T, size_t tile_size>
|
||||
__global__ void block_tile_shfl_down(T* const out, const unsigned int delta) {
|
||||
const cg::thread_block_tile<tile_size> partition =
|
||||
cg::tiled_partition<tile_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(partition.thread_rank());
|
||||
out[thread_rank_in_grid()] = partition.shfl_down(var, delta);
|
||||
}
|
||||
|
||||
template <typename T, size_t tile_size> void BlockTileShflDownTestImpl() {
|
||||
DYNAMIC_SECTION("Tile size: " << tile_size) {
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
auto delta = GENERATE(range(static_cast<size_t>(0), tile_size));
|
||||
INFO("Delta: " << delta);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
block_tile_shfl_down<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), delta);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
ArrayAllOf(arr.ptr(), grid.thread_count_, [delta, &grid](unsigned int i) -> std::optional<T> {
|
||||
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
|
||||
const auto rank_in_block = grid.thread_rank_in_block(i).value();
|
||||
const auto rank_in_group = rank_in_block % tile_size;
|
||||
const auto target = rank_in_group + delta;
|
||||
if (rank_in_block < (partitions_in_block - 1) * tile_size) {
|
||||
return target < tile_size ? target : rank_in_group;
|
||||
} else {
|
||||
// If the number of threads in a block is not an integer multiple of tile_size, the
|
||||
// final(tail end) tile will contain inactive threads.
|
||||
// Shuffling from an inactive thread returns an undefined value, accordingly threads that
|
||||
// shuffle from one must be skipped
|
||||
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
|
||||
return target < tile_size - tail ? std::optional(target) : std::nullopt;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, size_t... tile_sizes> void BlockTileShflDownTest() {
|
||||
static_cast<void>((BlockTileShflDownTestImpl<T, tile_sizes>(), ...));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle down behavior of thread block tiles of all valid sizes{2, 4, 8, 16,
|
||||
* 32, 64(if AMD)} for delta values of [0, tile size). The test is run for all overloads of
|
||||
* shfl_down.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic", "", int, unsigned int, long,
|
||||
unsigned long, long long, unsigned long long, float, double) {
|
||||
BlockTileShflDownTest<TestType, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockTileShflDownTest<TestType, 64>();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename T, size_t tile_size>
|
||||
__global__ void block_tile_shfl_xor(T* const out, const unsigned mask) {
|
||||
const cg::thread_block_tile<tile_size> partition =
|
||||
cg::tiled_partition<tile_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(partition.thread_rank());
|
||||
out[thread_rank_in_grid()] = partition.shfl_xor(var, mask);
|
||||
}
|
||||
|
||||
template <typename T, size_t tile_size> void BlockTileShflXORTestImpl() {
|
||||
DYNAMIC_SECTION("Tile size: " << tile_size) {
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
const auto mask = GENERATE(range(static_cast<size_t>(0), tile_size));
|
||||
INFO("Mask: 0x" << std::hex << mask);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
block_tile_shfl_xor<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), mask);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
const auto f = [mask, &grid](unsigned int i) -> std::optional<T> {
|
||||
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
|
||||
const auto rank_in_block = grid.thread_rank_in_block(i).value();
|
||||
const int rank_in_partition = rank_in_block % tile_size;
|
||||
const auto target = rank_in_partition ^ mask;
|
||||
if (rank_in_block < (partitions_in_block - 1) * tile_size) {
|
||||
return target;
|
||||
}
|
||||
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
|
||||
return target < tile_size - tail ? std::optional(target) : std::nullopt;
|
||||
};
|
||||
ArrayAllOf(arr.ptr(), grid.thread_count_, f);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, size_t... tile_sizes> void BlockTileShflXORTest() {
|
||||
static_cast<void>((BlockTileShflXORTestImpl<T, tile_sizes>(), ...));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle xor behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32,
|
||||
* 64(if AMD)} for mask values of [0, tile size). The test is run for all overloads of shfl_xor.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic", "", int, unsigned int, long,
|
||||
unsigned long, long long, unsigned long long, float, double) {
|
||||
BlockTileShflXORTest<TestType, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockTileShflXORTest<TestType, 64>();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T, size_t tile_size>
|
||||
__global__ void block_tile_shfl(T* const out, uint8_t* target_lanes) {
|
||||
const cg::thread_block_tile<tile_size> partition =
|
||||
cg::tiled_partition<tile_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(partition.thread_rank());
|
||||
out[thread_rank_in_grid()] = partition.shfl(var, target_lanes[partition.thread_rank()]);
|
||||
}
|
||||
|
||||
static inline std::mt19937& GetRandomGenerator() {
|
||||
static std::mt19937 mt(11);
|
||||
return mt;
|
||||
}
|
||||
|
||||
template <typename T> static inline T GenerateRandomInteger(const T min, const T max) {
|
||||
std::uniform_int_distribution<T> dist(min, max);
|
||||
return dist(GetRandomGenerator());
|
||||
}
|
||||
|
||||
template <typename T, size_t tile_size> void BlockTileShflTestImpl() {
|
||||
DYNAMIC_SECTION("Tile size: " << tile_size) {
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
LinearAllocGuard<uint8_t> target_lanes_dev(LinearAllocs::hipMalloc,
|
||||
tile_size * sizeof(uint8_t));
|
||||
LinearAllocGuard<uint8_t> target_lanes(LinearAllocs::hipHostMalloc,
|
||||
tile_size * sizeof(uint8_t));
|
||||
std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size,
|
||||
[] { return GenerateRandomInteger(0, static_cast<int>(2 * tile_size)); });
|
||||
|
||||
HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t),
|
||||
hipMemcpyHostToDevice));
|
||||
block_tile_shfl<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), target_lanes_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
const auto f = [&target_lanes, &grid](unsigned int i) -> std::optional<T> {
|
||||
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
|
||||
const auto rank_in_block = grid.thread_rank_in_block(i).value();
|
||||
const int rank_in_partition = rank_in_block % tile_size;
|
||||
const auto target = target_lanes.ptr()[rank_in_partition] % tile_size;
|
||||
if (rank_in_block < (partitions_in_block - 1) * tile_size) {
|
||||
return target;
|
||||
}
|
||||
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
|
||||
return target < tile_size - tail ? std::optional(target) : std::nullopt;
|
||||
};
|
||||
ArrayAllOf(arr.ptr(), grid.thread_count_, f);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, size_t... tile_sizes> void BlockTileShflTest() {
|
||||
static_cast<void>((BlockTileShflTestImpl<T, tile_sizes>(), ...));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32,
|
||||
* 64(if AMD)} for generated shuffle target lanes. The test is run for all overloads of shfl. Test
|
||||
* source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Positive_Basic", "", int, unsigned int, long,
|
||||
unsigned long, long long, unsigned long long, float, double) {
|
||||
BlockTileShflTest<TestType, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockTileShflTest<TestType, 64>();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <bool use_global, size_t tile_size, typename T>
|
||||
__global__ void block_tile_sync_check(T* global_data, unsigned int* wait_modifiers) {
|
||||
extern __shared__ uint8_t shared_data[];
|
||||
T* const data = use_global ? global_data : reinterpret_cast<T*>(shared_data);
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto block = cg::this_thread_block();
|
||||
const cg::thread_block_tile<tile_size> partition =
|
||||
cg::tiled_partition<tile_size>(cg::this_thread_block());
|
||||
|
||||
const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); };
|
||||
|
||||
const auto partitions_in_block = (block.size() + partition.size() - 1) / partition.size();
|
||||
const auto partition_rank = block.thread_rank() / partition.size();
|
||||
const auto tail = partitions_in_block * partition.size() - block.size();
|
||||
const auto window_size = partition.size() - tail * (partition_rank == partitions_in_block - 1);
|
||||
|
||||
const auto block_base_idx = tid / block.size() * block.size();
|
||||
const auto tile_base_idx = block_base_idx + partition_rank * partition.size();
|
||||
|
||||
const auto wait_modifier = wait_modifiers[tid];
|
||||
busy_wait(wait_modifier);
|
||||
data[data_idx(tid)] = partition.thread_rank();
|
||||
partition.sync();
|
||||
bool valid = true;
|
||||
for (auto i = 0; i < window_size; ++i) {
|
||||
const auto expected = (partition.thread_rank() + i) % window_size;
|
||||
|
||||
if (!(valid &= (data[data_idx(tile_base_idx + expected)] == expected))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
partition.sync();
|
||||
data[data_idx(tid)] = valid;
|
||||
if constexpr (!use_global) {
|
||||
global_data[tid] = data[data_idx(tid)];
|
||||
}
|
||||
}
|
||||
|
||||
template <bool global_memory, typename T, size_t tile_size> void BlockTileSyncTestImpl() {
|
||||
DYNAMIC_SECTION("Tile size: " << tile_size) {
|
||||
const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations));
|
||||
INFO("Run number: " << randomized_run_count + 1);
|
||||
auto blocks = GenerateBlockDimensions();
|
||||
auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
const auto alloc_size_per_block = alloc_size / grid.block_count_;
|
||||
int max_shared_mem_per_block = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block,
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
||||
if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) {
|
||||
return;
|
||||
}
|
||||
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
LinearAllocGuard<unsigned int> wait_modifiers_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
LinearAllocGuard<unsigned int> wait_modifiers(LinearAllocs::hipHostMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
if (randomized_run_count != 0) {
|
||||
std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_,
|
||||
[] { return GenerateRandomInteger(0u, 1500u); });
|
||||
} else {
|
||||
std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u);
|
||||
}
|
||||
|
||||
const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block;
|
||||
HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(),
|
||||
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
|
||||
block_tile_sync_check<global_memory, tile_size>
|
||||
<<<blocks, threads, shared_memory_size>>>(arr_dev.ptr(), wait_modifiers_dev.ptr());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
REQUIRE(
|
||||
std::all_of(arr.ptr(), arr.ptr() + grid.thread_count_, [](unsigned int e) { return e; }));
|
||||
}
|
||||
}
|
||||
|
||||
template <bool global_memory, typename T, size_t... tile_sizes> void BlockTileSyncTest() {
|
||||
static_cast<void>((BlockTileSyncTestImpl<global_memory, T, tile_sizes>(), ...));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches a kernel wherein blocks are divided into tiled partitions(size of 2, 4, 8, 16, 32,
|
||||
* 64 if AMD) and every thread writes its intra-tile rank into an array slot determined by its
|
||||
* grid-wide linear index. The array is either in global or dynamic shared memory based on a compile
|
||||
* time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the write each
|
||||
* thread executes a busy wait loop for a random amount of clock cycles, the amount being read from
|
||||
* an input array. After the write a tile-wide sync is performed and each thread validates that it
|
||||
* can read the expected values that other threads within the same tile have written to their
|
||||
* respective array slots.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/thread_block_tile.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Sync_Positive_Basic", "", uint8_t, uint16_t, uint32_t) {
|
||||
SECTION("Global memory") {
|
||||
BlockTileSyncTest<true, TestType, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockTileSyncTest<true, TestType, 64>();
|
||||
#endif
|
||||
}
|
||||
SECTION("Shared memory") {
|
||||
BlockTileSyncTest<false, TestType, 2, 4, 8, 16, 32>();
|
||||
#if HT_AMD && (__GFX8__ || __GFX9__)
|
||||
BlockTileSyncTest<true, TestType, 64>();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -3,9 +3,19 @@ set(TEST_SRC
|
||||
hipFuncSetCacheConfig.cc
|
||||
hipFuncSetSharedMemConfig.cc
|
||||
hipFuncSetAttribute.cc
|
||||
hipFuncGetAttributes.cc
|
||||
hipLaunchCooperativeKernel.cc
|
||||
hipLaunchCooperativeKernelMultiDevice.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC ${TEST_SRC}
|
||||
hipExtLaunchKernel.cc
|
||||
hipExtLaunchMultiKernelMultiDevice.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
hip_add_exe_to_target(NAME ExecutionControlTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
|
||||
@@ -23,5 +23,15 @@ THE SOFTWARE.
|
||||
#include "execution_control_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
|
||||
__global__ void kernel() {}
|
||||
__global__ void kernel() {}
|
||||
|
||||
__global__ void kernel2() {}
|
||||
|
||||
__global__ void kernel_42(int* val) { *val = 42; }
|
||||
|
||||
__global__ void coop_kernel() {
|
||||
cooperative_groups::grid_group grid = cooperative_groups::this_grid();
|
||||
grid.sync();
|
||||
}
|
||||
@@ -22,4 +22,10 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
__global__ void kernel();
|
||||
__global__ void kernel();
|
||||
|
||||
__global__ void kernel2();
|
||||
|
||||
__global__ void kernel_42(int* val);
|
||||
|
||||
__global__ void coop_kernel();
|
||||
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "execution_control_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") {
|
||||
SECTION("Kernel with no arguments") {
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{1, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
SECTION("Kernel with arguments using kernelParams") {
|
||||
LinearAllocGuard<int> result_dev(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr())));
|
||||
int* result_ptr = result_dev.ptr();
|
||||
void* kernel_args[1] = {&result_ptr};
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel_42), dim3{1, 1, 1}, dim3{1, 1, 1},
|
||||
kernel_args, 0, nullptr, nullptr, nullptr, 0u));
|
||||
int result = 0;
|
||||
HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault));
|
||||
REQUIRE(result == 42);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") {
|
||||
SECTION("blockDim.x == maxBlockDimX") {
|
||||
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX);
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{x, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
}
|
||||
|
||||
SECTION("blockDim.y == maxBlockDimY") {
|
||||
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY);
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{y, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
}
|
||||
|
||||
SECTION("blockDim.z == maxBlockDimZ") {
|
||||
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ);
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{z, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") {
|
||||
SECTION("f == nullptr") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(nullptr, dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, 0, nullptr,
|
||||
nullptr, nullptr, 0u),
|
||||
hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
SECTION("gridDim.x == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{0, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("gridDim.y == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 0, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("gridDim.z == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 0},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{0, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("blockDim.y == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 0, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("blockDim.z == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 0}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x > maxBlockDimX") {
|
||||
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.y > maxBlockDimY") {
|
||||
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.z > maxBlockDimZ") {
|
||||
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") {
|
||||
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock);
|
||||
const unsigned int dim = std::ceil(std::cbrt(max));
|
||||
HIP_CHECK_ERROR(
|
||||
hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("sharedMemBytes > maxSharedMemoryPerBlock") {
|
||||
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
SECTION("Invalid stream") {
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, stream, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Invalid startEvent") {
|
||||
hipEvent_t event = nullptr;
|
||||
HIP_CHECK(hipEventCreate(&event));
|
||||
HIP_CHECK(hipEventDestroy(event));
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr, event, nullptr, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Invalid endEvent") {
|
||||
hipEvent_t event = nullptr;
|
||||
HIP_CHECK(hipEventCreate(&event));
|
||||
HIP_CHECK(hipEventDestroy(event));
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, event, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
+144
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "execution_control_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") {
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
|
||||
std::vector<hipLaunchParams> params_list(device_count);
|
||||
|
||||
int device = 0;
|
||||
for (auto& params : params_list) {
|
||||
params.func = reinterpret_cast<void*>(kernel);
|
||||
params.gridDim = dim3{1, 1, 1};
|
||||
params.blockDim = dim3{1, 1, 1};
|
||||
params.args = nullptr;
|
||||
params.sharedMem = 0;
|
||||
HIP_CHECK(hipSetDevice(device++));
|
||||
HIP_CHECK(hipStreamCreate(¶ms.stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u));
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamSynchronize(params.stream));
|
||||
}
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamDestroy(params.stream));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters") {
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
|
||||
std::vector<hipLaunchParams> params_list(device_count);
|
||||
|
||||
int device = 0;
|
||||
for (auto& params : params_list) {
|
||||
params.func = reinterpret_cast<void*>(kernel);
|
||||
params.gridDim = dim3{1, 1, 1};
|
||||
params.blockDim = dim3{1, 1, 1};
|
||||
params.args = nullptr;
|
||||
params.sharedMem = 0;
|
||||
HIP_CHECK(hipSetDevice(device++));
|
||||
HIP_CHECK(hipStreamCreate(¶ms.stream));
|
||||
}
|
||||
|
||||
SECTION("launchParamsList == nullptr") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(nullptr, device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("numDevices == 0") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), 0, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("numDevices > device count") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count + 1, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("invalid flags") {
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 999),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (device_count > 1) {
|
||||
SECTION("launchParamsList.func doesn't match across all devices") {
|
||||
params_list[1].func = reinterpret_cast<void*>(kernel2);
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("launchParamsList.gridDim doesn't match across all kernels") {
|
||||
params_list[1].gridDim = dim3{2, 2, 2};
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("launchParamsList.blockDim doesn't match across all kernels") {
|
||||
params_list[1].blockDim = dim3{2, 2, 2};
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("launchParamsList.sharedMem doesn't match across all kernels") {
|
||||
params_list[1].sharedMem = 1024;
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamDestroy(params.stream));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
std::vector<hipLaunchParams> params_list(2);
|
||||
|
||||
for (auto& params : params_list) {
|
||||
params.func = reinterpret_cast<void*>(kernel);
|
||||
params.gridDim = dim3{1, 1, 1};
|
||||
params.blockDim = dim3{1, 1, 1};
|
||||
params.args = nullptr;
|
||||
params.sharedMem = 0;
|
||||
HIP_CHECK(hipStreamCreate(¶ms.stream));
|
||||
}
|
||||
|
||||
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), 2, 0u),
|
||||
hipErrorInvalidValue);
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamDestroy(params.stream));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <utils.hh>
|
||||
|
||||
constexpr size_t kConstSizeBytes = 128;
|
||||
__constant__ char const_data[kConstSizeBytes];
|
||||
|
||||
__global__ void attribute_test_kernel() {}
|
||||
|
||||
TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") {
|
||||
hipFuncAttributes attr;
|
||||
HIP_CHECK(hipFuncGetAttributes(&attr, reinterpret_cast<void*>(attribute_test_kernel)));
|
||||
|
||||
SECTION("binaryVersion") {
|
||||
#if HT_NVIDIA
|
||||
const auto major = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMajor);
|
||||
const auto minor = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMinor);
|
||||
REQUIRE(attr.binaryVersion == major * 10 + minor);
|
||||
#elif HT_AMD
|
||||
REQUIRE(attr.binaryVersion > 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
SECTION("cacheModeCA") { REQUIRE((attr.cacheModeCA == 0 || attr.cacheModeCA == 1)); }
|
||||
|
||||
SECTION("constSizeBytes") { REQUIRE(attr.constSizeBytes == kConstSizeBytes); }
|
||||
|
||||
SECTION("maxThreadsPerBlock") {
|
||||
REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock));
|
||||
}
|
||||
|
||||
SECTION("numRegs") { REQUIRE(attr.numRegs >= 0); }
|
||||
|
||||
SECTION("ptxVersion") { REQUIRE(attr.ptxVersion > 0); }
|
||||
|
||||
SECTION("sharedSizeBytes") {
|
||||
REQUIRE(attr.sharedSizeBytes <=
|
||||
GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipFuncGetAttributes_Negative_Parameters") {
|
||||
SECTION("attr == nullptr") {
|
||||
HIP_CHECK_ERROR(hipFuncGetAttributes(nullptr, reinterpret_cast<void*>(attribute_test_kernel)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("func == nullptr") {
|
||||
hipFuncAttributes attr;
|
||||
HIP_CHECK_ERROR(hipFuncGetAttributes(&attr, nullptr), hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "execution_control_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Basic") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
SECTION("Cooperative kernel with no arguments") {
|
||||
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(coop_kernel), dim3{2, 2, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
SECTION("Kernel with arguments using kernelParams") {
|
||||
LinearAllocGuard<int> result_dev(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr())));
|
||||
|
||||
int* result_ptr = result_dev.ptr();
|
||||
void* kernel_args[1] = {&result_ptr};
|
||||
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel_42), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, kernel_args, 0, nullptr));
|
||||
|
||||
int result = 0;
|
||||
HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault));
|
||||
REQUIRE(result == 42);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Parameters") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
SECTION("blockDim.x == maxBlockDimX") {
|
||||
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX);
|
||||
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{x, 1, 1}, nullptr, 0, nullptr));
|
||||
}
|
||||
|
||||
SECTION("blockDim.y == maxBlockDimY") {
|
||||
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY);
|
||||
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{y, 1, 1}, nullptr, 0, nullptr));
|
||||
}
|
||||
|
||||
SECTION("blockDim.z == maxBlockDimZ") {
|
||||
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ);
|
||||
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{z, 1, 1}, nullptr, 0, nullptr));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
SECTION("f == nullptr") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(static_cast<void*>(nullptr), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidDeviceFunction);
|
||||
}
|
||||
|
||||
SECTION("gridDim.x == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{0, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("gridDim.y == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 0, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("gridDim.z == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 0},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{0, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.y == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 0, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.z == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 0}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x > maxBlockDimX") {
|
||||
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u;
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{x, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.y > maxBlockDimY") {
|
||||
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u;
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, y, 1}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.z > maxBlockDimZ") {
|
||||
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u;
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, z}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") {
|
||||
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock);
|
||||
const unsigned int dim = std::ceil(std::cbrt(max));
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{dim, dim, dim}, nullptr, 0, nullptr),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION(
|
||||
"gridDim.x * gridDim.y * gridDim.z > maxActiveBlocksPerMultiprocessor * "
|
||||
"multiProcessorCount") {
|
||||
int max_blocks;
|
||||
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks,
|
||||
reinterpret_cast<void*>(kernel), 1, 0));
|
||||
const unsigned int multiproc_count =
|
||||
GetDeviceAttribute(0, hipDeviceAttributeMultiprocessorCount);
|
||||
const unsigned int dim = std::ceil(std::cbrt(max_blocks * multiproc_count));
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{dim, dim, dim},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr),
|
||||
hipErrorCooperativeLaunchTooLarge);
|
||||
}
|
||||
|
||||
SECTION("sharedMemBytes > maxSharedMemoryPerBlock") {
|
||||
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u;
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, max, nullptr),
|
||||
hipErrorCooperativeLaunchTooLarge);
|
||||
}
|
||||
|
||||
SECTION("Invalid stream") {
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, stream),
|
||||
hipErrorContextIsDestroyed);
|
||||
}
|
||||
}
|
||||
+159
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "execution_control_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
|
||||
std::vector<hipLaunchParams> params_list(device_count);
|
||||
|
||||
int device = 0;
|
||||
for (auto& params : params_list) {
|
||||
params.func = reinterpret_cast<void*>(coop_kernel);
|
||||
params.gridDim = dim3{1, 1, 1};
|
||||
params.blockDim = dim3{1, 1, 1};
|
||||
params.args = nullptr;
|
||||
params.sharedMem = 0;
|
||||
HIP_CHECK(hipSetDevice(device++));
|
||||
HIP_CHECK(hipStreamCreate(¶ms.stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u));
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamSynchronize(params.stream));
|
||||
}
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamDestroy(params.stream));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
|
||||
std::vector<hipLaunchParams> params_list(device_count);
|
||||
|
||||
int device = 0;
|
||||
for (auto& params : params_list) {
|
||||
params.func = reinterpret_cast<void*>(coop_kernel);
|
||||
params.gridDim = dim3{1, 1, 1};
|
||||
params.blockDim = dim3{1, 1, 1};
|
||||
params.args = nullptr;
|
||||
params.sharedMem = 0;
|
||||
HIP_CHECK(hipSetDevice(device++));
|
||||
HIP_CHECK(hipStreamCreate(¶ms.stream));
|
||||
}
|
||||
|
||||
SECTION("launchParamsList == nullptr") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(nullptr, device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("numDevices == 0") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), 0, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("numDevices > device count") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count + 1, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("invalid flags") {
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 999),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (device_count > 1) {
|
||||
SECTION("launchParamsList.func doesn't match across all devices") {
|
||||
params_list[1].func = reinterpret_cast<void*>(kernel);
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("launchParamsList.gridDim doesn't match across all kernels") {
|
||||
params_list[1].gridDim = dim3{2, 2, 2};
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("launchParamsList.blockDim doesn't match across all kernels") {
|
||||
params_list[1].blockDim = dim3{2, 2, 2};
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("launchParamsList.sharedMem doesn't match across all kernels") {
|
||||
params_list[1].sharedMem = 1024;
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamDestroy(params.stream));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
std::vector<hipLaunchParams> params_list(2);
|
||||
|
||||
for (auto& params : params_list) {
|
||||
params.func = reinterpret_cast<void*>(coop_kernel);
|
||||
params.gridDim = dim3{1, 1, 1};
|
||||
params.blockDim = dim3{1, 1, 1};
|
||||
params.args = nullptr;
|
||||
params.sharedMem = 0;
|
||||
HIP_CHECK(hipStreamCreate(¶ms.stream));
|
||||
}
|
||||
|
||||
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), 2, 0u),
|
||||
hipErrorInvalidValue);
|
||||
|
||||
for (const auto params : params_list) {
|
||||
HIP_CHECK(hipStreamDestroy(params.stream));
|
||||
}
|
||||
}
|
||||
@@ -32,6 +32,7 @@ set(TEST_SRC
|
||||
hipGraph.cc
|
||||
hipSimpleGraphWithKernel.cc
|
||||
hipGraphAddMemcpyNode.cc
|
||||
hipGraphAddMemcpyNode_old.cc
|
||||
hipGraphClone.cc
|
||||
hipGraphInstantiateWithFlags.cc
|
||||
hipGraphAddHostNode.cc
|
||||
@@ -54,6 +55,7 @@ set(TEST_SRC
|
||||
hipGraphAddMemcpyNode1D.cc
|
||||
hipGraphAddChildGraphNode.cc
|
||||
hipGraphNodeGetType.cc
|
||||
hipGraphExecMemcpyNodeSetParams1D_old.cc
|
||||
hipGraphExecMemcpyNodeSetParams1D.cc
|
||||
hipGraphGetEdges.cc
|
||||
hipGraphGetEdges_old.cc
|
||||
@@ -71,7 +73,10 @@ set(TEST_SRC
|
||||
hipGraphEventRecordNodeSetEvent.cc
|
||||
hipGraphEventWaitNodeGetEvent.cc
|
||||
hipGraphExecMemcpyNodeSetParams.cc
|
||||
hipGraphExecMemcpyNodeSetParams_old.cc
|
||||
hipStreamBeginCapture.cc
|
||||
hipGraphAddMemcpyNode1D_old.cc
|
||||
hipGraphAddMemcpyNode1D.cc
|
||||
hipStreamBeginCapture_old.cc
|
||||
hipStreamIsCapturing.cc
|
||||
hipStreamIsCapturing_old.cc)
|
||||
@@ -98,13 +103,16 @@ set(TEST_SRC
|
||||
hipGraphAddMemsetNode.cc
|
||||
hipGraphAddKernelNode.cc
|
||||
hipGraphMemcpyNodeGetParams.cc
|
||||
hipGraphMemcpyNodeGetParams_old.cc
|
||||
hipGraphMemcpyNodeSetParams.cc
|
||||
hipGraphMemcpyNodeSetParams_old.cc
|
||||
hipGraphKernelNodeGetParams.cc
|
||||
hipGraphKernelNodeSetParams.cc
|
||||
hipGraphExecKernelNodeSetParams.cc
|
||||
hipGraphLaunch.cc
|
||||
hipGraphLaunch_old.cc
|
||||
hipGraphMemcpyNodeSetParams1D.cc
|
||||
hipGraphMemcpyNodeSetParams1D_old.cc
|
||||
hipGraphExecMemcpyNodeSetParamsToSymbol_old.cc
|
||||
hipGraphExecMemcpyNodeSetParamsToSymbol.cc
|
||||
hipGraphNodeGetDependentNodes.cc
|
||||
|
||||
@@ -1,576 +1,287 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios : Negative
|
||||
1) Pass pGraphNode as nullptr and check if api returns error.
|
||||
2) When graph is un-initialized argument(skipping graph creation),
|
||||
api should return error code.
|
||||
3) Passing pDependencies as nullptr, api should return success.
|
||||
4) When numDependencies is max(size_t) and pDependencies is not valid ptr,
|
||||
api expected to return error code.
|
||||
5) When pDependencies is nullptr, but numDependencies is non-zero,
|
||||
api expected to return error.
|
||||
6) When pCopyParams is nullptr, api expected to return error code.
|
||||
7) API expects atleast one memcpy src pointer to be set.
|
||||
When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both
|
||||
are nullptr, api expected to return error code.
|
||||
8) API expects atleast one memcpy dst pointer to be set.
|
||||
When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both
|
||||
are nullptr, api expected to return error code.
|
||||
9) Passing different element size for hipMemcpy3DParms::srcArray and
|
||||
hipMemcpy3DParms::dstArray is expected to return error code.
|
||||
|
||||
Testcase Scenarios : Functional
|
||||
1) Add memcpy node to graph and verify memcpy operation is success for all
|
||||
memcpy kinds(H2D, D2H and D2D).
|
||||
Memcpy nodes are added and assigned to default device.
|
||||
2) Perform memcpy operation for 1D, 2D and 3D arrays on default device and
|
||||
verify the results.
|
||||
3) Add memcpy node to graph and verify memcpy operation is success for all
|
||||
memcpy kinds(H2D, D2H and D2D).
|
||||
Memcpy nodes are added and assigned to Peer device.
|
||||
4) Perform memcpy operation for 1D, 2D and 3D arrays on Peer device and
|
||||
verify the results.
|
||||
5) Create two host pointers, copy the data between them by the api
|
||||
hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
|
||||
Validate the output.
|
||||
*/
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <memcpy3d_tests_common.hh>
|
||||
|
||||
#define ZSIZE 32
|
||||
#define YSIZE 32
|
||||
#define XSIZE 32
|
||||
#include "graph_tests_common.hh"
|
||||
|
||||
/* Test verifies hipGraphAddMemcpyNode API Negative scenarios.
|
||||
/**
|
||||
* @addtogroup hipGraphAddMemcpyNode hipGraphAddMemcpyNode
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const
|
||||
* hipGraphNode_t *pDependencies, size_t numDependencies, const hipMemcpy3DParms
|
||||
* *pCopyParams)` - Creates a memcpy node and adds it to a graph
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify basic API behavior. A Memcpy node is created with parameters set according to the
|
||||
* test run, after which the graph is run and the memcpy results are verified.
|
||||
* The test is run for all possible memcpy directions, with both the corresponding memcpy
|
||||
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_Positive_Basic") {
|
||||
constexpr bool async = false;
|
||||
|
||||
constexpr int width{10}, height{10}, depth{10};
|
||||
hipArray_t devArray1;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
hipStream_t streamForGraph;
|
||||
hipError_t ret;
|
||||
SECTION("Device to host") { Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true>); }
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(size));
|
||||
int *hOutputData = reinterpret_cast<int *>(malloc(size));
|
||||
SECTION("Device to host with default kind") {
|
||||
Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
|
||||
REQUIRE(hData != nullptr);
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
memset(hOutputData, 0, size);
|
||||
SECTION("Host to device") { Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true>); }
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
SECTION("Host to device with default kind") {
|
||||
Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
|
||||
// Initialize host buffer
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
SECTION("Host to host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true>); }
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, height, depth), hipArrayDefault));
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
SECTION("Pass pGraphNode as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(nullptr, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("When graph is nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, nullptr, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Passing pDependencies as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("When numDependencies is max and pDependencies is not valid ptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph,
|
||||
nullptr, INT_MAX, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("When pDependencies is nullptr, but numDependencies is non-zero") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 11, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pCopyParams as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy src pointer to be set") {
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy dst pointer to be set") {
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
|
||||
"and hipMemcpy3DParms::dstArray") {
|
||||
myparams.srcArray = devArray1;
|
||||
hipArray_t devArray2;
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width+1, height+1, depth+1), hipArrayDefault));
|
||||
myparams.dstArray = devArray2;
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
free(hData);
|
||||
free(hOutputData);
|
||||
}
|
||||
|
||||
static void validateMemcpyNode3DArray(bool peerAccess = false) {
|
||||
constexpr int width{10}, height{10}, depth{10};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int *hData = reinterpret_cast<int*>(malloc(size));
|
||||
int *hOutputData = reinterpret_cast<int *>(malloc(size));
|
||||
|
||||
REQUIRE(hData != nullptr);
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
// Initialize host buffer
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true>);
|
||||
}
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, height, depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, height, depth), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
SECTION("Array from/to Host") { Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async, true>); }
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, height, depth);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, height, depth);
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Check result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
free(hData);
|
||||
free(hOutputData);
|
||||
}
|
||||
|
||||
static void validateMemcpyNode2DArray(bool peerAccess = false) {
|
||||
int harray2D[YSIZE][XSIZE]{};
|
||||
int harray2Dres[YSIZE][XSIZE]{};
|
||||
constexpr int width{XSIZE}, height{YSIZE};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Initialize 2D object
|
||||
for (int i = 0; i < YSIZE; i++) {
|
||||
for (int j = 0; j < XSIZE; j++) {
|
||||
harray2D[i][j] = i + j + 1;
|
||||
}
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
// Allocate 2D device array by passing depth(0)
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, height, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, height, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, height, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(harray2D, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, height, 1);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, height, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray2Dres, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < YSIZE; i++) {
|
||||
for (int j = 0; j < XSIZE; j++) {
|
||||
if (harray2D[i][j] != harray2Dres[i][j]) {
|
||||
INFO("harray2D: " << harray2D[i][j] << "harray2Dres: "
|
||||
<< harray2Dres[i][j] << " mismatch at (i,j) : " << i << j);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
}
|
||||
|
||||
static void validateMemcpyNode1DArray(bool peerAccess = false) {
|
||||
int harray1D[XSIZE]{};
|
||||
int harray1Dres[XSIZE]{};
|
||||
constexpr int width{XSIZE};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Initialize 1D object
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
harray1D[i] = i + 1;
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
// Allocate 1D device array by passing depth(0), height(0)
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
if (harray1D[i] != harray1Dres[i]) {
|
||||
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
|
||||
<< " mismatch at : " << i);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220
|
||||
SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async, true>); }
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic Functional Tests adds memcpy nodes of types H2D, D2D and D2H to graph
|
||||
* and verifies execution sequence by launching graph on default device.
|
||||
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# node is nullptr
|
||||
* -# graph is nullptr
|
||||
* -# pDependencies is nullptr when numDependencies is not zero
|
||||
* -# A node in pDependencies originates from a different graph
|
||||
* -# numDependencies is invalid
|
||||
* -# A node is duplicated in pDependencies
|
||||
* -# dst is nullptr
|
||||
* -# src is nullptr
|
||||
* -# kind is an invalid enum value
|
||||
* -# count is zero
|
||||
* -# count is larger than dst allocation size
|
||||
* -# count is larger than src allocation size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
|
||||
SECTION("Memcpy with 3D array on default device") {
|
||||
validateMemcpyNode3DArray();
|
||||
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
|
||||
|
||||
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
GraphAddNodeCommonNegativeTests(std::bind(hipGraphAddMemcpyNode, _1, _2, _3, _4, ¶ms),
|
||||
graph);
|
||||
|
||||
SECTION("dst_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("dst_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.x = src_ptr.pitch - extent.width + 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.y out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.y = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.y out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.y = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.z out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.z = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.z out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.z = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent,
|
||||
static_cast<hipMemcpyKind>(-1));
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms),
|
||||
hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
};
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 2D array on default device") {
|
||||
validateMemcpyNode2DArray();
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 1D array on default device") {
|
||||
validateMemcpyNode1DArray();
|
||||
SECTION("Host to Host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Peer access tests adds and assigns memcpy nodes of types H2D, D2D and D2H
|
||||
* to peer device. Memory allocations happen on device(0) and memcpy operations
|
||||
* are performed from device(1).
|
||||
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
int numDevices{}, peerAccess{};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
}
|
||||
|
||||
if (!peerAccess) {
|
||||
WARN("Skipping test as peer device access is not found!");
|
||||
return;
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 3D array on peer device") {
|
||||
validateMemcpyNode3DArray(true);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 2D array on peer device") {
|
||||
validateMemcpyNode2DArray(true);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 1D array on peer device") {
|
||||
validateMemcpyNode1DArray(true);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Create two host pointers, copy the data between them by the api
|
||||
* hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
|
||||
* Validate the output.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_HostToHost") {
|
||||
constexpr size_t size = 1024;
|
||||
size_t numW = size * sizeof(int);
|
||||
// Host Vectors
|
||||
std::vector<int> A_h(numW);
|
||||
std::vector<int> B_h(numW);
|
||||
// Initialization
|
||||
std::iota(A_h.begin(), A_h.end(), 0);
|
||||
std::fill_n(B_h.begin(), size, 0);
|
||||
|
||||
hipGraph_t graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpyH2H;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
hipMemcpy3DParms myparms{};
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.srcPtr = make_hipPitchedPtr(A_h.data(), numW, numW, 1);
|
||||
myparms.dstPtr = make_hipPitchedPtr(B_h.data(), numW, numW, 1);
|
||||
myparms.extent = make_hipExtent(numW, 1, 1);
|
||||
myparms.kind = hipMemcpyHostToHost;
|
||||
|
||||
// Host to Host
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyH2H, graph, nullptr,
|
||||
0, &myparms));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
|
||||
// Validation
|
||||
REQUIRE(memcmp(A_h.data(), B_h.data(), numW) == 0);
|
||||
}
|
||||
|
||||
@@ -6,237 +6,179 @@ in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Functional -
|
||||
1) Add 1D memcpy node to graph and verify memcpy operation is success for all memcpy kinds(H2D, D2H and D2D).
|
||||
Memcpy nodes are added and assigned to default device.
|
||||
2) Allocate memory on default device(Dev 0), Perform memcpy operation for 1D arrays on Peer device(Dev 1) and
|
||||
verify the results.
|
||||
3) Create two host pointers, copy the data between them by the api hipGraphAddMemcpyNode1D with data transfer
|
||||
kind hipMemcpyHostToHost. Validate the output.
|
||||
|
||||
Negative -
|
||||
1) Pass pGraphNode as nullptr and check if api returns error.
|
||||
2) When graph is un-initialized argument(skipping graph creation), api should return error code.
|
||||
3) Passing pDependencies as nullptr, api should return success.
|
||||
4) When numDependencies is max(size_t) and pDependencies is not valid ptr, api expected to return error code.
|
||||
5) When pDependencies is nullptr, but numDependencies is non-zero, api expected to return error.
|
||||
6) When destination ptr is nullptr, api expected to return error code.
|
||||
7) When source ptr is nullptr, api expected to return error code.
|
||||
8) If count is more than allocated size for source and destination ptr, error code is returned.
|
||||
9) If count is less than or equal to allocated size of source and destination ptr, api should return success.
|
||||
*/
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
static void validateMemcpyNode1DArray(bool peerAccess) {
|
||||
constexpr int SIZE{32};
|
||||
int harray1D[SIZE]{};
|
||||
int harray1Dres[SIZE]{};
|
||||
hipGraph_t graph;
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipGraphNode_t memcpyH2D, memcpyD2H, memcpyD2D;
|
||||
constexpr int numBytes{SIZE * sizeof(int)};
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipMalloc(&devArray1, numBytes));
|
||||
HIP_CHECK(hipMalloc(&devArray2, numBytes));
|
||||
|
||||
// Initialize 1D object
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
harray1D[i] = i + 1;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device (harray1D -> devArray1)
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0,
|
||||
devArray1, harray1D, numBytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device (devArray1 -> devArray2)
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, graph, &memcpyH2D, 1,
|
||||
devArray2, devArray1, numBytes, hipMemcpyDeviceToDevice));
|
||||
|
||||
// Device to host (devArray2 -> harray1Dres)
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &memcpyD2D, 1,
|
||||
harray1Dres, devArray2, numBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
if (harray1D[i] != harray1Dres[i]) {
|
||||
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
|
||||
<< " mismatch at : " << i);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFree(devArray1));
|
||||
HIP_CHECK(hipFree(devArray2));
|
||||
}
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
|
||||
#include "graph_tests_common.hh"
|
||||
|
||||
/**
|
||||
* Functional Tests adds memcpy 1D nodes of types H2D, D2D and D2H to graph
|
||||
* and verifies execution sequence by launching graph.
|
||||
*
|
||||
* For Default device test: Memory allocations and memory operations
|
||||
* are performed from device(0).
|
||||
* For Peer device test: Memory allocations happen on device(0) and memcpy operations
|
||||
* are performed from device(1).
|
||||
* @addtogroup hipGraphAddMemcpyNode1D hipGraphAddMemcpyNode1D
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphAddMemcpyNode1D(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t
|
||||
* *pDependencies, size_t numDependencies, void *dst, const void *src, size_t count, hipMemcpyKind
|
||||
* kind)` - Creates a 1D memcpy node and adds it to a graph
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") {
|
||||
SECTION("Memcpy with 1D array on default device") {
|
||||
validateMemcpyNode1DArray(false);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 1D array on peer device") {
|
||||
int numDevices{}, peerAccess{};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
}
|
||||
|
||||
if (!peerAccess) {
|
||||
WARN("Skipping test as peer device access is not found!");
|
||||
return;
|
||||
}
|
||||
validateMemcpyNode1DArray(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Negative Test for API hipGraphAddMemcpyNode1D
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify basic API behavior. A Memcpy1D node is created with parameters set according to the
|
||||
* test run, after which the graph is run and the memcpy results are verified.
|
||||
* The test is run for all possible memcpy directions, with both the corresponding memcpy
|
||||
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *A_h;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode{};
|
||||
hipError_t ret;
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Positive_Basic") {
|
||||
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, count, direction));
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&A_h, Nbytes));
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
return hipSuccess;
|
||||
};
|
||||
|
||||
#if HT_NVIDIA
|
||||
MemcpyWithDirectionCommonTests<false>(f);
|
||||
#else
|
||||
using namespace std::placeholders;
|
||||
|
||||
SECTION("Device to host") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
SECTION("Device to host with default kind") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
|
||||
SECTION("Host to device") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
SECTION("Host to device with default kind") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-209
|
||||
#if 0
|
||||
SECTION("Host to host") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
|
||||
}
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# node is nullptr
|
||||
* -# graph is nullptr
|
||||
* -# pDependencies is nullptr when numDependencies is not zero
|
||||
* -# A node in pDependencies originates from a different graph
|
||||
* -# numDependencies is invalid
|
||||
* -# A node is duplicated in pDependencies
|
||||
* -# dst is nullptr
|
||||
* -# src is nullptr
|
||||
* -# kind is an invalid enum value
|
||||
* -# count is zero
|
||||
* -# count is larger than dst allocation size
|
||||
* -# count is larger than src allocation size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
int src[2] = {}, dst[2] = {};
|
||||
|
||||
SECTION("Pass pGraphNode as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(nullptr, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
GraphAddNodeCommonNegativeTests(
|
||||
std::bind(hipGraphAddMemcpyNode1D, _1, _2, _3, _4, dst, src, sizeof(dst), hipMemcpyDefault),
|
||||
graph);
|
||||
|
||||
MemcpyWithDirectionCommonNegativeTests(
|
||||
std::bind(hipGraphAddMemcpyNode1D, &node, graph, nullptr, 0, _1, _2, _3, _4), dst, src,
|
||||
sizeof(dst), hipMemcpyDefault);
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-211
|
||||
#if HT_NVIDIA
|
||||
SECTION("count == 0") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, 0, hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass graph as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, nullptr,
|
||||
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
#endif
|
||||
|
||||
SECTION("count larger than dst allocation size") {
|
||||
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dev_dst.ptr(), src,
|
||||
sizeof(src), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass pDependencies as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
|
||||
SECTION("count larger than src allocation size") {
|
||||
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK_ERROR(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, dev_src.ptr(),
|
||||
sizeof(dst), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass numDependencies is max and pDependencies is not valid ptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, INT_MAX, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pDependencies as nullptr, but numDependencies is non-zero") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 9, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass destination ptr as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, nullptr, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass source ptr as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, nullptr, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass count as more than allocated size for source ptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes+10, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass count as less than allocated size for destination ptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes-10, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(A_h));
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
/*
|
||||
* Create two host pointers, copy the data between them by the api
|
||||
* hipGraphAddMemcpyNode1D with data transfer kind hipMemcpyHostToHost.
|
||||
* Validate the output.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_HostToHost") {
|
||||
constexpr size_t size = 1024;
|
||||
size_t numBytes{size * sizeof(int)};
|
||||
|
||||
// Host Vectors
|
||||
std::vector<int> A_h(size);
|
||||
std::vector<int> B_h(size);
|
||||
// Initialization
|
||||
std::iota(A_h.begin(), A_h.end(), 0);
|
||||
std::fill_n(B_h.begin(), size, 0);
|
||||
|
||||
hipGraph_t graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpyH2H;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
// Host to Host
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0,
|
||||
B_h.data(), A_h.data(), numBytes, hipMemcpyHostToHost));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
|
||||
// Validation
|
||||
REQUIRE(std::equal(A_h.begin(), A_h.end(), B_h.begin(), B_h.end()));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,242 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Functional -
|
||||
1) Add 1D memcpy node to graph and verify memcpy operation is success for all memcpy kinds(H2D, D2H and D2D).
|
||||
Memcpy nodes are added and assigned to default device.
|
||||
2) Allocate memory on default device(Dev 0), Perform memcpy operation for 1D arrays on Peer device(Dev 1) and
|
||||
verify the results.
|
||||
3) Create two host pointers, copy the data between them by the api hipGraphAddMemcpyNode1D with data transfer
|
||||
kind hipMemcpyHostToHost. Validate the output.
|
||||
|
||||
Negative -
|
||||
1) Pass pGraphNode as nullptr and check if api returns error.
|
||||
2) When graph is un-initialized argument(skipping graph creation), api should return error code.
|
||||
3) Passing pDependencies as nullptr, api should return success.
|
||||
4) When numDependencies is max(size_t) and pDependencies is not valid ptr, api expected to return error code.
|
||||
5) When pDependencies is nullptr, but numDependencies is non-zero, api expected to return error.
|
||||
6) When destination ptr is nullptr, api expected to return error code.
|
||||
7) When source ptr is nullptr, api expected to return error code.
|
||||
8) If count is more than allocated size for source and destination ptr, error code is returned.
|
||||
9) If count is less than or equal to allocated size of source and destination ptr, api should return success.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
static void validateMemcpyNode1DArray(bool peerAccess) {
|
||||
constexpr int SIZE{32};
|
||||
int harray1D[SIZE]{};
|
||||
int harray1Dres[SIZE]{};
|
||||
hipGraph_t graph;
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipGraphNode_t memcpyH2D, memcpyD2H, memcpyD2D;
|
||||
constexpr int numBytes{SIZE * sizeof(int)};
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipMalloc(&devArray1, numBytes));
|
||||
HIP_CHECK(hipMalloc(&devArray2, numBytes));
|
||||
|
||||
// Initialize 1D object
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
harray1D[i] = i + 1;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device (harray1D -> devArray1)
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0,
|
||||
devArray1, harray1D, numBytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device (devArray1 -> devArray2)
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, graph, &memcpyH2D, 1,
|
||||
devArray2, devArray1, numBytes, hipMemcpyDeviceToDevice));
|
||||
|
||||
// Device to host (devArray2 -> harray1Dres)
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &memcpyD2D, 1,
|
||||
harray1Dres, devArray2, numBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
if (harray1D[i] != harray1Dres[i]) {
|
||||
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
|
||||
<< " mismatch at : " << i);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFree(devArray1));
|
||||
HIP_CHECK(hipFree(devArray2));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Functional Tests adds memcpy 1D nodes of types H2D, D2D and D2H to graph
|
||||
* and verifies execution sequence by launching graph.
|
||||
*
|
||||
* For Default device test: Memory allocations and memory operations
|
||||
* are performed from device(0).
|
||||
* For Peer device test: Memory allocations happen on device(0) and memcpy operations
|
||||
* are performed from device(1).
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") {
|
||||
SECTION("Memcpy with 1D array on default device") {
|
||||
validateMemcpyNode1DArray(false);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 1D array on peer device") {
|
||||
int numDevices{}, peerAccess{};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
}
|
||||
|
||||
if (!peerAccess) {
|
||||
WARN("Skipping test as peer device access is not found!");
|
||||
return;
|
||||
}
|
||||
validateMemcpyNode1DArray(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Negative Test for API hipGraphAddMemcpyNode1D
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *A_h;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode{};
|
||||
hipError_t ret;
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&A_h, Nbytes));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
SECTION("Pass pGraphNode as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(nullptr, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass graph as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, nullptr,
|
||||
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pDependencies as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass numDependencies is max and pDependencies is not valid ptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, INT_MAX, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pDependencies as nullptr, but numDependencies is non-zero") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 9, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass destination ptr as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, nullptr, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass source ptr as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, nullptr, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass count as more than allocated size for source ptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes+10, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass count as less than allocated size for destination ptr") {
|
||||
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
|
||||
nullptr, 0, A_d, A_h, Nbytes-10, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(A_h));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
/*
|
||||
* Create two host pointers, copy the data between them by the api
|
||||
* hipGraphAddMemcpyNode1D with data transfer kind hipMemcpyHostToHost.
|
||||
* Validate the output.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_HostToHost") {
|
||||
constexpr size_t size = 1024;
|
||||
size_t numBytes{size * sizeof(int)};
|
||||
|
||||
// Host Vectors
|
||||
std::vector<int> A_h(size);
|
||||
std::vector<int> B_h(size);
|
||||
// Initialization
|
||||
std::iota(A_h.begin(), A_h.end(), 0);
|
||||
std::fill_n(B_h.begin(), size, 0);
|
||||
|
||||
hipGraph_t graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpyH2H;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
// Host to Host
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0,
|
||||
B_h.data(), A_h.data(), numBytes, hipMemcpyHostToHost));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
|
||||
// Validation
|
||||
REQUIRE(std::equal(A_h.begin(), A_h.end(), B_h.begin(), B_h.end()));
|
||||
}
|
||||
@@ -0,0 +1,576 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios : Negative
|
||||
1) Pass pGraphNode as nullptr and check if api returns error.
|
||||
2) When graph is un-initialized argument(skipping graph creation),
|
||||
api should return error code.
|
||||
3) Passing pDependencies as nullptr, api should return success.
|
||||
4) When numDependencies is max(size_t) and pDependencies is not valid ptr,
|
||||
api expected to return error code.
|
||||
5) When pDependencies is nullptr, but numDependencies is non-zero,
|
||||
api expected to return error.
|
||||
6) When pCopyParams is nullptr, api expected to return error code.
|
||||
7) API expects atleast one memcpy src pointer to be set.
|
||||
When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both
|
||||
are nullptr, api expected to return error code.
|
||||
8) API expects atleast one memcpy dst pointer to be set.
|
||||
When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both
|
||||
are nullptr, api expected to return error code.
|
||||
9) Passing different element size for hipMemcpy3DParms::srcArray and
|
||||
hipMemcpy3DParms::dstArray is expected to return error code.
|
||||
|
||||
Testcase Scenarios : Functional
|
||||
1) Add memcpy node to graph and verify memcpy operation is success for all
|
||||
memcpy kinds(H2D, D2H and D2D).
|
||||
Memcpy nodes are added and assigned to default device.
|
||||
2) Perform memcpy operation for 1D, 2D and 3D arrays on default device and
|
||||
verify the results.
|
||||
3) Add memcpy node to graph and verify memcpy operation is success for all
|
||||
memcpy kinds(H2D, D2H and D2D).
|
||||
Memcpy nodes are added and assigned to Peer device.
|
||||
4) Perform memcpy operation for 1D, 2D and 3D arrays on Peer device and
|
||||
verify the results.
|
||||
5) Create two host pointers, copy the data between them by the api
|
||||
hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
|
||||
Validate the output.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#define ZSIZE 32
|
||||
#define YSIZE 32
|
||||
#define XSIZE 32
|
||||
|
||||
/* Test verifies hipGraphAddMemcpyNode API Negative scenarios.
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{10}, height{10}, depth{10};
|
||||
hipArray_t devArray1;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
hipStream_t streamForGraph;
|
||||
hipError_t ret;
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(size));
|
||||
int *hOutputData = reinterpret_cast<int *>(malloc(size));
|
||||
|
||||
REQUIRE(hData != nullptr);
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// Initialize host buffer
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, height, depth), hipArrayDefault));
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
SECTION("Pass pGraphNode as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(nullptr, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("When graph is nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, nullptr, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Passing pDependencies as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("When numDependencies is max and pDependencies is not valid ptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph,
|
||||
nullptr, INT_MAX, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("When pDependencies is nullptr, but numDependencies is non-zero") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 11, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pCopyParams as nullptr") {
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy src pointer to be set") {
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy dst pointer to be set") {
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
|
||||
"and hipMemcpy3DParms::dstArray") {
|
||||
myparams.srcArray = devArray1;
|
||||
hipArray_t devArray2;
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width+1, height+1, depth+1), hipArrayDefault));
|
||||
myparams.dstArray = devArray2;
|
||||
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
free(hData);
|
||||
free(hOutputData);
|
||||
}
|
||||
|
||||
static void validateMemcpyNode3DArray(bool peerAccess = false) {
|
||||
constexpr int width{10}, height{10}, depth{10};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int *hData = reinterpret_cast<int*>(malloc(size));
|
||||
int *hOutputData = reinterpret_cast<int *>(malloc(size));
|
||||
|
||||
REQUIRE(hData != nullptr);
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
// Initialize host buffer
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, height, depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, height, depth), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width , height, depth);
|
||||
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, height, depth);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, height, depth);
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Check result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
free(hData);
|
||||
free(hOutputData);
|
||||
}
|
||||
|
||||
static void validateMemcpyNode2DArray(bool peerAccess = false) {
|
||||
int harray2D[YSIZE][XSIZE]{};
|
||||
int harray2Dres[YSIZE][XSIZE]{};
|
||||
constexpr int width{XSIZE}, height{YSIZE};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Initialize 2D object
|
||||
for (int i = 0; i < YSIZE; i++) {
|
||||
for (int j = 0; j < XSIZE; j++) {
|
||||
harray2D[i][j] = i + j + 1;
|
||||
}
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
// Allocate 2D device array by passing depth(0)
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, height, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, height, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, height, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(harray2D, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, height, 1);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, height, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray2Dres, width * sizeof(int),
|
||||
width, height);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < YSIZE; i++) {
|
||||
for (int j = 0; j < XSIZE; j++) {
|
||||
if (harray2D[i][j] != harray2Dres[i][j]) {
|
||||
INFO("harray2D: " << harray2D[i][j] << "harray2Dres: "
|
||||
<< harray2Dres[i][j] << " mismatch at (i,j) : " << i << j);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
}
|
||||
|
||||
static void validateMemcpyNode1DArray(bool peerAccess = false) {
|
||||
int harray1D[XSIZE]{};
|
||||
int harray1Dres[XSIZE]{};
|
||||
constexpr int width{XSIZE};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Initialize 1D object
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
harray1D[i] = i + 1;
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
// Allocate 1D device array by passing depth(0), height(0)
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// For peer access test, Memory is allocated on device(0)
|
||||
// while memcpy nodes are allocated and assigned to peer device(1)
|
||||
if (peerAccess) {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
}
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
if (harray1D[i] != harray1Dres[i]) {
|
||||
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
|
||||
<< " mismatch at : " << i);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic Functional Tests adds memcpy nodes of types H2D, D2D and D2H to graph
|
||||
* and verifies execution sequence by launching graph on default device.
|
||||
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
SECTION("Memcpy with 3D array on default device") {
|
||||
validateMemcpyNode3DArray();
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 2D array on default device") {
|
||||
validateMemcpyNode2DArray();
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 1D array on default device") {
|
||||
validateMemcpyNode1DArray();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Peer access tests adds and assigns memcpy nodes of types H2D, D2D and D2H
|
||||
* to peer device. Memory allocations happen on device(0) and memcpy operations
|
||||
* are performed from device(1).
|
||||
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
int numDevices{}, peerAccess{};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
}
|
||||
|
||||
if (!peerAccess) {
|
||||
WARN("Skipping test as peer device access is not found!");
|
||||
return;
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 3D array on peer device") {
|
||||
validateMemcpyNode3DArray(true);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 2D array on peer device") {
|
||||
validateMemcpyNode2DArray(true);
|
||||
}
|
||||
|
||||
SECTION("Memcpy with 1D array on peer device") {
|
||||
validateMemcpyNode1DArray(true);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Create two host pointers, copy the data between them by the api
|
||||
* hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
|
||||
* Validate the output.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNode_HostToHost") {
|
||||
constexpr size_t size = 1024;
|
||||
size_t numW = size * sizeof(int);
|
||||
// Host Vectors
|
||||
std::vector<int> A_h(numW);
|
||||
std::vector<int> B_h(numW);
|
||||
// Initialization
|
||||
std::iota(A_h.begin(), A_h.end(), 0);
|
||||
std::fill_n(B_h.begin(), size, 0);
|
||||
|
||||
hipGraph_t graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpyH2H;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
hipMemcpy3DParms myparms{};
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.srcPtr = make_hipPitchedPtr(A_h.data(), numW, numW, 1);
|
||||
myparms.dstPtr = make_hipPitchedPtr(B_h.data(), numW, numW, 1);
|
||||
myparms.extent = make_hipExtent(numW, 1, 1);
|
||||
myparms.kind = hipMemcpyHostToHost;
|
||||
|
||||
// Host to Host
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyH2H, graph, nullptr,
|
||||
0, &myparms));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
|
||||
// Validation
|
||||
REQUIRE(memcmp(A_h.data(), B_h.data(), numW) == 0);
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,247 +20,248 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Functional-
|
||||
1) Instantiate a graph with memcpy node, obtain executable graph and update the hipMemcpy3DParms node params with set. Make sure they are taking effect.
|
||||
Negative-
|
||||
1) Pass hGraphExec as nullptr and verify api returns error code.
|
||||
2) Pass node as nullptr and verify api returns error code.
|
||||
3) Pass pNodeParams as nullptr and verify api returns error code.
|
||||
4) Pass pNodeParams as empty structure object and verify api returns error code.
|
||||
5) API expects atleast one memcpy src pointer to be set. When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both are nullptr, api expected to return error code.
|
||||
6) API expects atleast one memcpy dst pointer to be set. When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both are nullptr, api expected to return error code.
|
||||
7) Passing different element size for hipMemcpy3DParms::srcArray and hipMemcpy3DParms::dstArray is expected to return error code.
|
||||
8) Pass node of different graph and verify api returns error code.
|
||||
*/
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
#include <memcpy3d_tests_common.hh>
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams API Negative scenarios.
|
||||
#include "graph_tests_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipGraphExecMemcpyNodeSetParams hipGraphExecMemcpyNodeSetParams
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipMemcpy3DParms
|
||||
* *pNodeParams)` - Sets the parameters for a memcpy node in the given graphExec
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{10}, height{10}, depth{10};
|
||||
hipArray_t devArray, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
hipError_t ret;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify that node parameters get updated correctly by creating a node with valid but
|
||||
* incorrect parameters, and the setting them to the correct values in the executable graph. The
|
||||
* executable graph is run and the results of the memcpy verified. The test is run for all possible
|
||||
* memcpy directions, with both the corresponding memcpy kind and hipMemcpyDefault, as well as half
|
||||
* page and full page allocation sizes. Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphExecMemcpyNodeSetParams.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic") {
|
||||
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
const auto offset_src = reinterpret_cast<uint8_t*>(src) + 1;
|
||||
const auto offset_dst = reinterpret_cast<uint8_t*>(dst) + 1;
|
||||
auto params =
|
||||
GetMemcpy3DParms(make_hipPitchedPtr(offset_dst, 0, count - 1, 0), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(offset_src, 0, count - 1, 0), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(count - 1, 1, 1), direction);
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms));
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, count, 0), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src, 0, count, 0), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(count, 1, 1), direction);
|
||||
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
return hipSuccess;
|
||||
};
|
||||
|
||||
#if HT_NVIDIA
|
||||
MemcpyWithDirectionCommonTests<false>(f);
|
||||
#else
|
||||
using namespace std::placeholders;
|
||||
|
||||
SECTION("Device to host") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
SECTION("Host to device") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width+1,
|
||||
height+1, depth+1), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
SECTION("Pass hGraphExec as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams(nullptr, memcpyNode, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass node as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, nullptr, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pNodeParams as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pNodeParams as empty structure object") {
|
||||
hipMemcpy3DParms temp{};
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy src pointer to be set") {
|
||||
hipMemcpy3DParms temp;
|
||||
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
|
||||
temp.srcPos = make_hipPos(0, 0, 0);
|
||||
temp.dstPos = make_hipPos(0, 0, 0);
|
||||
temp.extent = make_hipExtent(width , height, depth);
|
||||
temp.dstArray = devArray;
|
||||
temp.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy dst pointer to be set") {
|
||||
hipMemcpy3DParms temp;
|
||||
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
|
||||
temp.srcPos = make_hipPos(0, 0, 0);
|
||||
temp.dstPos = make_hipPos(0, 0, 0);
|
||||
temp.extent = make_hipExtent(width , height, depth);
|
||||
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
temp.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
|
||||
"and hipMemcpy3DParms::dstArray") {
|
||||
hipMemcpy3DParms temp;
|
||||
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
|
||||
temp.srcPos = make_hipPos(0, 0, 0);
|
||||
temp.dstPos = make_hipPos(0, 0, 0);
|
||||
temp.extent = make_hipExtent(width , height, depth);
|
||||
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
temp.kind = hipMemcpyHostToDevice;
|
||||
temp.srcArray = devArray;
|
||||
temp.dstArray = devArray2;
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Check with other graph node") {
|
||||
hipGraph_t graph1;
|
||||
hipGraphNode_t memcpyNode1;
|
||||
HIP_CHECK(hipGraphCreate(&graph1, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, graph1, NULL, 0, &myparms));
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode1, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK(hipGraphDestroy(graph1));
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
free(hData);
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams API Functional scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Functional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int XSIZE = 1024;
|
||||
int harray1D[XSIZE]{};
|
||||
int harray1Dres[XSIZE]{};
|
||||
constexpr int width{XSIZE};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Initialize 1D object
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
harray1D[i] = i + 1;
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
// Allocate 1D device array by passing depth(0), height(0)
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
int harray1Dupdate[XSIZE]{};
|
||||
hipArray_t devArray3;
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray3, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
|
||||
// D2H updated with different pointer harray1Dres -> harray1Dupdate
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &myparams));
|
||||
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
if (harray1D[i] != harray1Dupdate[i]) {
|
||||
INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " <<
|
||||
harray1Dupdate[i] << " mismatch at : " << i);
|
||||
REQUIRE(false);
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-209
|
||||
#if 0
|
||||
SECTION("Host to host") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
|
||||
}
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-210
|
||||
#if 0
|
||||
SECTION("Device to host with default kind") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
|
||||
SECTION("Host to device with default kind") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# pGraphExec is nullptr
|
||||
* -# node is nullptr
|
||||
* -# graph is nullptr
|
||||
* -# pDependencies is nullptr when numDependencies is not zero
|
||||
* -# A node in pDependencies originates from a different graph
|
||||
* -# numDependencies is invalid
|
||||
* -# A node is duplicated in pDependencies
|
||||
* -# dst is nullptr
|
||||
* -# src is nullptr
|
||||
* -# kind is an invalid enum value
|
||||
* -# count is zero
|
||||
* -# count is larger than dst allocation size
|
||||
* -# count is larger than src allocation size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
int src[2] = {}, dst[2] = {};
|
||||
|
||||
auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(dst), 0), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src, 0, sizeof(src), 0), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(sizeof(dst), 1, 1), hipMemcpyDefault);
|
||||
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms));
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
|
||||
SECTION("pGraphExec == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(nullptr, node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("node == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, nullptr, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
auto f = [&](void* dst, void* src, size_t count, hipMemcpyKind kind) {
|
||||
auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(dst), 0), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src, 0, sizeof(src), 0), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(count, 1, 1), kind);
|
||||
return hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms);
|
||||
};
|
||||
MemcpyWithDirectionCommonNegativeTests(f, dst, src, sizeof(dst), hipMemcpyDefault);
|
||||
|
||||
SECTION("count == 0") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, 0, hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("count larger than dst allocation size") {
|
||||
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
|
||||
params.dstPtr = make_hipPitchedPtr(dev_dst.ptr(), 0, sizeof(int), 0);
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("count larger than src allocation size") {
|
||||
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
|
||||
params.dstPtr = make_hipPitchedPtr(dev_src.ptr(), 0, sizeof(int), 0);
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify that memcpy direction cannot be altered in an executable graph. The test is run for
|
||||
* all memcpy directions with appropriate memory allocations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphExecMemcpyNodeSetParams.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction") {
|
||||
int host;
|
||||
LinearAllocGuard<int> dev(LinearAllocs::hipMalloc, sizeof(int));
|
||||
|
||||
const auto [dir, src, dst] =
|
||||
GENERATE_REF(std::make_tuple(hipMemcpyHostToHost, &host, &host),
|
||||
std::make_tuple(hipMemcpyHostToDevice, &host, dev.ptr()),
|
||||
std::make_tuple(hipMemcpyDeviceToHost, dev.ptr(), &host),
|
||||
std::make_tuple(hipMemcpyDeviceToDevice, dev.ptr(), dev.ptr()));
|
||||
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(int), 0), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src, 0, sizeof(int), 0), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(sizeof(int), 1, 1), dir);
|
||||
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms));
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
|
||||
const auto set_dir = GENERATE(hipMemcpyHostToHost, hipMemcpyHostToDevice, hipMemcpyDeviceToHost,
|
||||
hipMemcpyDeviceToDevice, hipMemcpyDefault);
|
||||
if (dir == set_dir) {
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
return;
|
||||
}
|
||||
|
||||
params.kind = set_dir;
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms), hipErrorInvalidValue);
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
+206
-151
@@ -6,8 +6,10 @@ in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,182 +19,235 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Testcase Scenarios :
|
||||
Functional-
|
||||
1) Instantiate a graph with memcpy node, obtain executable graph and update the
|
||||
node params with set exec api call. Make sure they are taking effect.
|
||||
Negative-
|
||||
1) Pass hGraphExec as nullptr and check if api returns error.
|
||||
2) Pass GraphNode as nullptr and check if api returns error.
|
||||
3) Pass destination ptr is nullptr, api expected to return error code.
|
||||
4) Pass source ptr is nullptr, api expected to return error code.
|
||||
5) Pass count as zero, api expected to return error code.
|
||||
6) Pass same pointer as source ptr and destination ptr, api expected to return error code.
|
||||
7) Pass overlap memory address as source ptr and destination ptr, api expected to return error code.
|
||||
7) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code.
|
||||
8) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code.
|
||||
9) If count is more than allocated size for source and destination ptr, api should return error code.
|
||||
10) If count is less than allocated size for source and destination ptr, api should return error code.
|
||||
11) Change the hipMemcpyKind from H2D to D2H but allocate pointer memory for H2D, api should return error code.
|
||||
*/
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Negative scenarios.
|
||||
#include "graph_tests_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipGraphExecMemcpyNodeSetParams1D hipGraphExecMemcpyNodeSetParams1D
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, void *dst,
|
||||
* const void *src, size_t count, hipMemcpyKind kind)` - Sets the parameters for a memcpy node in
|
||||
* the given graphExec to perform a 1-dimensional copy
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
|
||||
int *A_d;
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
int *A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
memset(A_h, 0, Nbytes);
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify that node parameters get updated correctly by creating a node with valid but
|
||||
* incorrect parameters, and the setting them to the correct values in the executable graph. The
|
||||
* executable graph is run and the results of the memcpy verified. The test is run for all possible
|
||||
* memcpy directions, with both the corresponding memcpy kind and hipMemcpyDefault, as well as half
|
||||
* page and full page allocation sizes. Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Positive_Basic") {
|
||||
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
const auto offset_src = reinterpret_cast<uint8_t*>(src) + 1;
|
||||
const auto offset_dst = reinterpret_cast<uint8_t*>(dst) + 1;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, offset_dst, offset_src, count - 1,
|
||||
direction));
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, count, direction));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
hipError_t ret;
|
||||
hipGraphNode_t memcpyH2D;
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
return hipSuccess;
|
||||
};
|
||||
|
||||
#if HT_NVIDIA
|
||||
MemcpyWithDirectionCommonTests<false>(f);
|
||||
#else
|
||||
using namespace std::placeholders;
|
||||
|
||||
SECTION("Device to host") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
|
||||
}
|
||||
|
||||
SECTION("Host to device") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
}
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-209
|
||||
#if 0
|
||||
SECTION("Host to host") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
|
||||
}
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-210
|
||||
#if 0
|
||||
SECTION("Device to host with default kind") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
|
||||
SECTION("Host to device with default kind") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# pGraphExec is nullptr
|
||||
* -# node is nullptr
|
||||
* -# graph is nullptr
|
||||
* -# pDependencies is nullptr when numDependencies is not zero
|
||||
* -# A node in pDependencies originates from a different graph
|
||||
* -# numDependencies is invalid
|
||||
* -# A node is duplicated in pDependencies
|
||||
* -# dst is nullptr
|
||||
* -# src is nullptr
|
||||
* -# kind is an invalid enum value
|
||||
* -# count is zero
|
||||
* -# count is larger than dst allocation size
|
||||
* -# count is larger than src allocation size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
|
||||
SECTION("Pass hGraphExec as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(nullptr, memcpyH2D, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
int src[2] = {}, dst[2] = {};
|
||||
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(
|
||||
hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(dst), hipMemcpyDefault));
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
|
||||
SECTION("pGraphExec == nullptr") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphExecMemcpyNodeSetParams1D(nullptr, node, dst, src, sizeof(dst), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass GraphNode as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, nullptr, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("node == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, nullptr, dst, src, sizeof(dst),
|
||||
hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass destination ptr is nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, nullptr, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
MemcpyWithDirectionCommonNegativeTests(
|
||||
std::bind(hipGraphExecMemcpyNodeSetParams1D, graph_exec, node, _1, _2, _3, _4), dst, src,
|
||||
sizeof(dst), hipMemcpyDefault);
|
||||
|
||||
SECTION("count == 0") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, 0, hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass source ptr is nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, nullptr,
|
||||
Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("count larger than dst allocation size") {
|
||||
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dev_dst.ptr(), src,
|
||||
sizeof(src), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass count as zero") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
|
||||
0, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("count larger than src allocation size") {
|
||||
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, dev_src.ptr(),
|
||||
sizeof(dst), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass same pointer as source ptr and destination ptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_d,
|
||||
Nbytes, hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_d-5,
|
||||
Nbytes, hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d+5, A_d,
|
||||
Nbytes, hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Copy more than allocated memory") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
|
||||
Nbytes+8, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Copy less than allocated memory") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
|
||||
Nbytes-8, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Change the hipMemcpyKind from H2D to D2H") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
|
||||
Nbytes, hipMemcpyDeviceToHost);
|
||||
REQUIRE(hipSuccess != ret);
|
||||
}
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Functional scenarios.
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify that memcpy direction cannot be altered in an executable graph. The test is run for
|
||||
* all memcpy directions with appropriate memory allocations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction") {
|
||||
int host;
|
||||
LinearAllocGuard<int> dev(LinearAllocs::hipMalloc, sizeof(int));
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, Nbytes);
|
||||
|
||||
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
|
||||
hipGraphNode_t kernel_vecAdd;
|
||||
hipKernelNodeParams kernelNodeParams{};
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipStream_t streamForGraph;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
const auto [dir, src, dst] =
|
||||
GENERATE_REF(std::make_tuple(hipMemcpyHostToHost, &host, &host),
|
||||
std::make_tuple(hipMemcpyHostToDevice, &host, dev.ptr()),
|
||||
std::make_tuple(hipMemcpyDeviceToHost, dev.ptr(), &host),
|
||||
std::make_tuple(hipMemcpyDeviceToDevice, dev.ptr(), dev.ptr()));
|
||||
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(int), dir));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
const auto set_dir = GENERATE(hipMemcpyHostToHost, hipMemcpyHostToDevice, hipMemcpyDeviceToHost,
|
||||
hipMemcpyDeviceToDevice, hipMemcpyDefault);
|
||||
if (dir == set_dir) {
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
return;
|
||||
}
|
||||
|
||||
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kernelNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kernelNodeParams.gridDim = dim3(blocks);
|
||||
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kernelNodeParams.sharedMemBytes = 0;
|
||||
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
||||
kernelNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kernelNodeParams));
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, sizeof(int), set_dir),
|
||||
hipErrorInvalidValue);
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
|
||||
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyD2H_C, hData,
|
||||
C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD(A_h, B_h, hData, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
free(hData);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Testcase Scenarios :
|
||||
Functional-
|
||||
1) Instantiate a graph with memcpy node, obtain executable graph and update the
|
||||
node params with set exec api call. Make sure they are taking effect.
|
||||
Negative-
|
||||
1) Pass hGraphExec as nullptr and check if api returns error.
|
||||
2) Pass GraphNode as nullptr and check if api returns error.
|
||||
3) Pass destination ptr is nullptr, api expected to return error code.
|
||||
4) Pass source ptr is nullptr, api expected to return error code.
|
||||
5) Pass count as zero, api expected to return error code.
|
||||
6) Pass same pointer as source ptr and destination ptr, api expected to return error code.
|
||||
7) Pass overlap memory address as source ptr and destination ptr, api expected to return error code.
|
||||
7) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code.
|
||||
8) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code.
|
||||
9) If count is more than allocated size for source and destination ptr, api should return error code.
|
||||
10) If count is less than allocated size for source and destination ptr, api should return error code.
|
||||
11) Change the hipMemcpyKind from H2D to D2H but allocate pointer memory for H2D, api should return error code.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Functional scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, Nbytes);
|
||||
|
||||
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
|
||||
hipGraphNode_t kernel_vecAdd;
|
||||
hipKernelNodeParams kernelNodeParams{};
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipStream_t streamForGraph;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kernelNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kernelNodeParams.gridDim = dim3(blocks);
|
||||
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kernelNodeParams.sharedMemBytes = 0;
|
||||
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
||||
kernelNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kernelNodeParams));
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
|
||||
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyD2H_C, hData,
|
||||
C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD(A_h, B_h, hData, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
free(hData);
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Negative scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
|
||||
LinearAllocGuard<int> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
LinearAllocGuard<int> A_h(LinearAllocs::malloc, Nbytes);
|
||||
memset(A_h.ptr(), 0, Nbytes);
|
||||
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t memcpyH2D;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, A_d.ptr(), A_h.ptr(),
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
// Instantiate the graph
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
|
||||
SECTION("Pass hGraphExec as nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(nullptr, memcpyH2D, A_d.ptr(),
|
||||
A_h.ptr(), Nbytes,
|
||||
hipMemcpyHostToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass GraphNode as nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, nullptr, A_d.ptr(),
|
||||
A_h.ptr(), Nbytes,
|
||||
hipMemcpyHostToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass destination ptr is nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D,
|
||||
nullptr, A_h.ptr(), Nbytes,
|
||||
hipMemcpyHostToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass source ptr is nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
|
||||
nullptr, Nbytes,
|
||||
hipMemcpyHostToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass count as zero") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
|
||||
A_h.ptr(), 0,
|
||||
hipMemcpyHostToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass same pointer as source ptr and destination ptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
|
||||
A_d.ptr(), Nbytes,
|
||||
hipMemcpyDeviceToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
|
||||
A_d.ptr() - 5, Nbytes,
|
||||
hipMemcpyDeviceToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D,
|
||||
A_d.ptr() + 5, A_d.ptr(), Nbytes,
|
||||
hipMemcpyDeviceToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Copy more than allocated memory") {
|
||||
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
|
||||
A_h.ptr(), Nbytes + 8,
|
||||
hipMemcpyHostToDevice),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
@@ -0,0 +1,263 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Functional-
|
||||
1) Instantiate a graph with memcpy node, obtain executable graph and update the hipMemcpy3DParms node params with set. Make sure they are taking effect.
|
||||
Negative-
|
||||
1) Pass hGraphExec as nullptr and verify api returns error code.
|
||||
2) Pass node as nullptr and verify api returns error code.
|
||||
3) Pass pNodeParams as nullptr and verify api returns error code.
|
||||
4) Pass pNodeParams as empty structure object and verify api returns error code.
|
||||
5) API expects atleast one memcpy src pointer to be set. When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both are nullptr, api expected to return error code.
|
||||
6) API expects atleast one memcpy dst pointer to be set. When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both are nullptr, api expected to return error code.
|
||||
7) Passing different element size for hipMemcpy3DParms::srcArray and hipMemcpy3DParms::dstArray is expected to return error code.
|
||||
8) Pass node of different graph and verify api returns error code.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams API Negative scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{10}, height{10}, depth{10};
|
||||
hipArray_t devArray, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
hipError_t ret;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width+1,
|
||||
height+1, depth+1), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
SECTION("Pass hGraphExec as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams(nullptr, memcpyNode, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass node as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, nullptr, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pNodeParams as nullptr") {
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass pNodeParams as empty structure object") {
|
||||
hipMemcpy3DParms temp{};
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy src pointer to be set") {
|
||||
hipMemcpy3DParms temp;
|
||||
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
|
||||
temp.srcPos = make_hipPos(0, 0, 0);
|
||||
temp.dstPos = make_hipPos(0, 0, 0);
|
||||
temp.extent = make_hipExtent(width , height, depth);
|
||||
temp.dstArray = devArray;
|
||||
temp.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("API expects atleast one memcpy dst pointer to be set") {
|
||||
hipMemcpy3DParms temp;
|
||||
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
|
||||
temp.srcPos = make_hipPos(0, 0, 0);
|
||||
temp.dstPos = make_hipPos(0, 0, 0);
|
||||
temp.extent = make_hipExtent(width , height, depth);
|
||||
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
temp.kind = hipMemcpyHostToDevice;
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
|
||||
"and hipMemcpy3DParms::dstArray") {
|
||||
hipMemcpy3DParms temp;
|
||||
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
|
||||
temp.srcPos = make_hipPos(0, 0, 0);
|
||||
temp.dstPos = make_hipPos(0, 0, 0);
|
||||
temp.extent = make_hipExtent(width , height, depth);
|
||||
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
temp.kind = hipMemcpyHostToDevice;
|
||||
temp.srcArray = devArray;
|
||||
temp.dstArray = devArray2;
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Check with other graph node") {
|
||||
hipGraph_t graph1;
|
||||
hipGraphNode_t memcpyNode1;
|
||||
HIP_CHECK(hipGraphCreate(&graph1, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, graph1, NULL, 0, &myparms));
|
||||
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode1, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK(hipGraphDestroy(graph1));
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
free(hData);
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphExecMemcpyNodeSetParams API Functional scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Functional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int XSIZE = 1024;
|
||||
int harray1D[XSIZE]{};
|
||||
int harray1Dres[XSIZE]{};
|
||||
constexpr int width{XSIZE};
|
||||
hipArray_t devArray1, devArray2;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparams;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Initialize 1D object
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
harray1D[i] = i + 1;
|
||||
}
|
||||
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
// Allocate 1D device array by passing depth(0), height(0)
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
// Host to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.dstArray = devArray1;
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to Device
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.srcArray = devArray1;
|
||||
myparams.dstArray = devArray2;
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
dependencies.clear();
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparams));
|
||||
|
||||
// Instantiate the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
int harray1Dupdate[XSIZE]{};
|
||||
hipArray_t devArray3;
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray3, &channelDesc,
|
||||
make_hipExtent(width, 0, 0), hipArrayDefault));
|
||||
|
||||
// D2H updated with different pointer harray1Dres -> harray1Dupdate
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(width, 1, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int),
|
||||
width, 1);
|
||||
myparams.srcArray = devArray2;
|
||||
myparams.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &myparams));
|
||||
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Validate result
|
||||
for (int i = 0; i < XSIZE; i++) {
|
||||
if (harray1D[i] != harray1Dupdate[i]) {
|
||||
INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " <<
|
||||
harray1Dupdate[i] << " mismatch at : " << i);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
HIP_CHECK(hipFreeArray(devArray2));
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -18,178 +21,66 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup hipGraphKernelNodeGetAttribute hipGraphKernelNodeGetAttribute
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode,
|
||||
* hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value_out )` -
|
||||
* Queries node attribute.
|
||||
*/
|
||||
#define THREADS_PER_BLOCK 512
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Functional Test for API - hipGraphKernelNodeGetAttribute
|
||||
* 1) GetKernelAttribute for ID hipKernelNodeAttributeCooperative
|
||||
* 2) GetKernelAttribute for ID hipKernelNodeAttributeAccessPolicyWindow
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphKernelNodeGetAttribute.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters") {
|
||||
constexpr int N = 1024;
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
|
||||
hipKernelNodeParams kNodeParams{};
|
||||
hipStream_t stream;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kNodeParams.gridDim = dim3(blocks);
|
||||
kNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kNodeParams.sharedMemBytes = 0;
|
||||
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kNodeParams));
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1));
|
||||
|
||||
hipKernelNodeAttrValue value_out;
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
|
||||
SECTION("GetKernelAttribute for hipKernelNodeAttributeCooperative") {
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeCooperative, &value_out));
|
||||
}
|
||||
SECTION("GetKernelAttribute for hipKernelNodeAttributeAccessPolicyWindow") {
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
}
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Negative Test for API - hipGraphKernelNodeGetAttribute
|
||||
* 1) Pass kernel node as nullptr for Get attribute api & verify
|
||||
* 2) Pass KernelNodeAttrID as negative value for Get attribute api & verify
|
||||
* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api & verify
|
||||
* 4) Pass KernelNodeAttrValue as nullptr for Get attribute api & verify
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphKernelNodeGetAttribute.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
|
||||
hipKernelNodeParams kNodeParams{};
|
||||
hipStream_t stream;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
hipError_t ret;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kNodeParams.gridDim = dim3(blocks);
|
||||
kNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kNodeParams.sharedMemBytes = 0;
|
||||
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kNodeParams));
|
||||
hipKernelNodeParams node_params{};
|
||||
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
||||
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
|
||||
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
|
||||
|
||||
hipKernelNodeAttrValue value_out;
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
size_t N_elem{N};
|
||||
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
|
||||
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
|
||||
|
||||
SECTION("Pass kernel node as nullptr for Get attribute api") {
|
||||
ret = hipGraphKernelNodeGetAttribute(nullptr,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
hipGraphNode_t graph_node;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
|
||||
|
||||
hipKernelNodeAttrValue node_attribute;
|
||||
|
||||
SECTION("node == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(
|
||||
nullptr, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as negative value for Get attribute api") {
|
||||
ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttrID(-1), &value_out);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("node is not a kernel node") {
|
||||
hipGraphNode_t empty_node;
|
||||
HIP_CHECK(hipGraphAddEmptyNode(&empty_node, graph, nullptr, 0));
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(
|
||||
empty_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as INT_MAX value for Get attribute api") {
|
||||
ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttrID(INT_MAX), &value_out);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("invalid attribute") {
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(graph_node, static_cast<hipKernelNodeAttrID>(-1),
|
||||
&node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD // getting SIGSEGV error in Cuda Setup
|
||||
SECTION("Pass KernelNodeAttrValue as nullptr for Get attribute api") {
|
||||
ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("value == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(
|
||||
graph_node, hipKernelNodeAttributeAccessPolicyWindow, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -18,353 +21,214 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode,
|
||||
* hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value )` -
|
||||
* Sets node attribute.
|
||||
*/
|
||||
#define THREADS_PER_BLOCK 512
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Functional Test for API - hipGraphKernelNodeSetAttribute
|
||||
* 1) Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow attributes
|
||||
* 2) Check hipGraphKernelNodeSetAttribute for cooperative attributes
|
||||
* 3) Check hipGraphKernelNodeSetAttribute for window cooperative attributes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphKernelNodeGetAttribute.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
namespace {
|
||||
constexpr std::array<hipAccessProperty, 3> kAccessProperties{
|
||||
hipAccessPropertyNormal, hipAccessPropertyStreaming, hipAccessPropertyPersisting};
|
||||
} // anonymous namespace
|
||||
|
||||
static bool validateKernelNodeAttrValue(hipKernelNodeAttrValue in,
|
||||
hipKernelNodeAttrValue out) {
|
||||
if ((in.accessPolicyWindow.base_ptr != out.accessPolicyWindow.base_ptr) ||
|
||||
(in.accessPolicyWindow.hitProp != out.accessPolicyWindow.hitProp) ||
|
||||
(in.accessPolicyWindow.hitRatio != out.accessPolicyWindow.hitRatio) ||
|
||||
(in.accessPolicyWindow.missProp != out.accessPolicyWindow.missProp) ||
|
||||
(in.accessPolicyWindow.num_bytes != out.accessPolicyWindow.num_bytes) ||
|
||||
(in.cooperative != out.cooperative)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
static bool CompareAccessPolicyWindow(const hipKernelNodeAttrValue& lhs,
|
||||
const hipKernelNodeAttrValue& rhs) {
|
||||
return lhs.accessPolicyWindow.base_ptr == rhs.accessPolicyWindow.base_ptr &&
|
||||
lhs.accessPolicyWindow.num_bytes == rhs.accessPolicyWindow.num_bytes &&
|
||||
lhs.accessPolicyWindow.hitRatio == rhs.accessPolicyWindow.hitRatio &&
|
||||
lhs.accessPolicyWindow.hitProp == rhs.accessPolicyWindow.hitProp &&
|
||||
lhs.accessPolicyWindow.missProp == rhs.accessPolicyWindow.missProp;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
|
||||
hipKernelNodeParams kNodeParams{};
|
||||
hipStream_t stream;
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow") {
|
||||
constexpr int N = 1024;
|
||||
|
||||
const auto hit_prop = GENERATE(from_range(begin(kAccessProperties), end(kAccessProperties)));
|
||||
const auto miss_prop = GENERATE(from_range(begin(kAccessProperties), end(kAccessProperties) - 1));
|
||||
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
|
||||
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kNodeParams.gridDim = dim3(blocks);
|
||||
kNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kNodeParams.sharedMemBytes = 0;
|
||||
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kNodeParams));
|
||||
hipKernelNodeParams node_params{};
|
||||
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
||||
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
|
||||
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1));
|
||||
size_t N_elem{N};
|
||||
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
|
||||
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
|
||||
|
||||
hipKernelNodeAttrValue value_in, value_out;
|
||||
hipGraphNode_t graph_node;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
|
||||
|
||||
SECTION("Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow") {
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
int max_window_size;
|
||||
HIP_CHECK(
|
||||
hipDeviceGetAttribute(&max_window_size, hipDeviceAttributeAccessPolicyMaxWindowSize, 0));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
hipKernelNodeAttrValue node_attribute_1;
|
||||
node_attribute_1.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(A_d);
|
||||
node_attribute_1.accessPolicyWindow.num_bytes =
|
||||
std::min<unsigned long>(static_cast<unsigned long>(max_window_size), sizeof(int) * N);
|
||||
node_attribute_1.accessPolicyWindow.hitRatio = 0.6;
|
||||
node_attribute_1.accessPolicyWindow.hitProp = hit_prop;
|
||||
node_attribute_1.accessPolicyWindow.missProp = miss_prop;
|
||||
|
||||
value_in.accessPolicyWindow.hitRatio = 0.8;
|
||||
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
value_in.accessPolicyWindow.missProp = hipAccessPropertyStreaming;
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(graph_node, hipKernelNodeAttributeAccessPolicyWindow,
|
||||
&node_attribute_1));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
hipKernelNodeAttrValue node_attribute_2;
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(graph_node, hipKernelNodeAttributeAccessPolicyWindow,
|
||||
&node_attribute_2));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
|
||||
}
|
||||
SECTION("Check hipGraphKernelNodeSetAttribute for cooperative") {
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
REQUIRE(CompareAccessPolicyWindow(node_attribute_1, node_attribute_2));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
value_in.cooperative = 2;
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
|
||||
}
|
||||
|
||||
SECTION("Check hipGraphKernelNodeSetAttribute for window and cooperative") {
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
value_in.cooperative = 8;
|
||||
value_in.accessPolicyWindow.hitRatio = 0.1;
|
||||
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
value_in.accessPolicyWindow.missProp = hipAccessPropertyNormal;
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
|
||||
}
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Negative/argument Test for API - hipGraphKernelNodeSetAttribute
|
||||
* 1) Pass kernel node as nullptr for Set attribute api and verify
|
||||
* 2) Pass KernelNodeAttrID as invalid value for Set attribute api and verify
|
||||
* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api and verify
|
||||
* 4) Pass KernelNodeAttrValue as nullptr for Set attribute api and verify
|
||||
* 5) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value missProp as hipAccessPropertyPersisting
|
||||
* 6) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value hitProp as hipAccessPropertyPersisting
|
||||
* 7) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as 1.4
|
||||
* 8) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as 0
|
||||
* 9) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as 1
|
||||
* 10) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as -1.8
|
||||
* 11) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as -0.6
|
||||
* 12) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6
|
||||
* 13) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
* and pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6
|
||||
* 14) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.num_bytes as 1 MB
|
||||
* 15) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value base_ptr as nullptr
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphKernelNodeSetAttribute.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Positive_Cooperative") {
|
||||
constexpr int N = 1024;
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
|
||||
hipKernelNodeParams kNodeParams{};
|
||||
hipStream_t stream;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
hipError_t ret;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
|
||||
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kNodeParams.gridDim = dim3(blocks);
|
||||
kNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kNodeParams.sharedMemBytes = 0;
|
||||
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kNodeParams));
|
||||
hipKernelNodeParams node_params{};
|
||||
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
||||
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
|
||||
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
|
||||
|
||||
hipKernelNodeAttrValue value_in, value_out;
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
memcpy(&value_out, &value_in, sizeof(hipKernelNodeAttrValue));
|
||||
size_t N_elem{N};
|
||||
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
|
||||
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
|
||||
|
||||
SECTION("Pass kernel node as nullptr for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(nullptr,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
hipGraphNode_t graph_node;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
|
||||
|
||||
hipKernelNodeAttrValue node_attribute_1;
|
||||
node_attribute_1.cooperative = 2;
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(graph_node, hipKernelNodeAttributeCooperative,
|
||||
&node_attribute_1));
|
||||
|
||||
hipKernelNodeAttrValue node_attribute_2;
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(graph_node, hipKernelNodeAttributeCooperative,
|
||||
&node_attribute_2));
|
||||
|
||||
REQUIRE(node_attribute_1.cooperative == node_attribute_2.cooperative);
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters") {
|
||||
constexpr int N = 1024;
|
||||
|
||||
int *A_d, *B_d, *C_d;
|
||||
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
|
||||
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
|
||||
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
hipKernelNodeParams node_params{};
|
||||
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
||||
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
|
||||
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
|
||||
|
||||
size_t N_elem{N};
|
||||
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
|
||||
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
|
||||
|
||||
hipGraphNode_t graph_node;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
|
||||
|
||||
int max_window_size;
|
||||
HIP_CHECK(
|
||||
hipDeviceGetAttribute(&max_window_size, hipDeviceAttributeAccessPolicyMaxWindowSize, 0));
|
||||
|
||||
hipKernelNodeAttrValue node_attribute;
|
||||
node_attribute.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(A_d);
|
||||
node_attribute.accessPolicyWindow.num_bytes =
|
||||
std::min<unsigned long>(static_cast<unsigned long>(max_window_size), sizeof(int) * N);
|
||||
node_attribute.accessPolicyWindow.hitRatio = 0.6;
|
||||
node_attribute.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
node_attribute.accessPolicyWindow.missProp = hipAccessPropertyStreaming;
|
||||
|
||||
SECTION("node == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
nullptr, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as invalid value for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttrID(-1), &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("node is not a kernel node") {
|
||||
hipGraphNode_t empty_node;
|
||||
HIP_CHECK(hipGraphAddEmptyNode(&empty_node, graph, nullptr, 0));
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
empty_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as INT_MAX value for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttrID(INT_MAX), &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("invalid attribute") {
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(graph_node, static_cast<hipKernelNodeAttrID>(-1),
|
||||
&node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD // getting SIGSEGV error in Cuda Setup
|
||||
SECTION("Pass KernelNodeAttrValue as nullptr for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("value == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
graph_node, hipKernelNodeAttributeAccessPolicyWindow, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value missProp as hipAccessPropertyPersisting") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.missProp = hipAccessPropertyPersisting;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value hitProp as hipAccessPropertyPersisting") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as 1.4") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = 1.4;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as 0") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = 0;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as 1") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = 1;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as -1.8") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = -1.8;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as -0.6") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = -0.6;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" & pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.num_bytes = 1024;
|
||||
value_in.accessPolicyWindow.hitRatio = 0.6;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" & pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.num_bytes = 1024 * 1024 * 1024;
|
||||
value_in.accessPolicyWindow.hitRatio = -0.6;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.num_bytes as 1 MB") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.num_bytes = 1024 * 1024;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value base_ptr as nullptr") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.base_ptr = nullptr;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
|
||||
SECTION("accessPolicyWindow.num_bytes > accessPolicyMaxWindowSize") {
|
||||
node_attribute.accessPolicyWindow.num_bytes = max_window_size + 1;
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("accessPolicyWindow.hitRatio < 0") {
|
||||
node_attribute.accessPolicyWindow.hitRatio = -0.6;
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("accessPolicyWindow.hitRatio > 1.0") {
|
||||
node_attribute.accessPolicyWindow.hitRatio = 1.1;
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("accessPolicyWindow.missProp == hipAccessPropertyPersisting") {
|
||||
node_attribute.accessPolicyWindow.missProp = hipAccessPropertyPersisting;
|
||||
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
|
||||
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
@@ -0,0 +1,370 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode,
|
||||
* hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value )` -
|
||||
* Sets node attribute.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Functional Test for API - hipGraphKernelNodeSetAttribute
|
||||
* 1) Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow attributes
|
||||
* 2) Check hipGraphKernelNodeSetAttribute for cooperative attributes
|
||||
* 3) Check hipGraphKernelNodeSetAttribute for window cooperative attributes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphKernelNodeGetAttribute.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
static bool validateKernelNodeAttrValue(hipKernelNodeAttrValue in,
|
||||
hipKernelNodeAttrValue out) {
|
||||
if ((in.accessPolicyWindow.base_ptr != out.accessPolicyWindow.base_ptr) ||
|
||||
(in.accessPolicyWindow.hitProp != out.accessPolicyWindow.hitProp) ||
|
||||
(in.accessPolicyWindow.hitRatio != out.accessPolicyWindow.hitRatio) ||
|
||||
(in.accessPolicyWindow.missProp != out.accessPolicyWindow.missProp) ||
|
||||
(in.accessPolicyWindow.num_bytes != out.accessPolicyWindow.num_bytes) ||
|
||||
(in.cooperative != out.cooperative)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
|
||||
hipKernelNodeParams kNodeParams{};
|
||||
hipStream_t stream;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kNodeParams.gridDim = dim3(blocks);
|
||||
kNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kNodeParams.sharedMemBytes = 0;
|
||||
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kNodeParams));
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1));
|
||||
|
||||
hipKernelNodeAttrValue value_in, value_out;
|
||||
|
||||
SECTION("Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow") {
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
value_in.accessPolicyWindow.hitRatio = 0.8;
|
||||
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
value_in.accessPolicyWindow.missProp = hipAccessPropertyStreaming;
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
|
||||
}
|
||||
SECTION("Check hipGraphKernelNodeSetAttribute for cooperative") {
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
value_in.cooperative = 2;
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
|
||||
}
|
||||
|
||||
SECTION("Check hipGraphKernelNodeSetAttribute for window and cooperative") {
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
value_in.cooperative = 8;
|
||||
value_in.accessPolicyWindow.hitRatio = 0.1;
|
||||
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
value_in.accessPolicyWindow.missProp = hipAccessPropertyNormal;
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
|
||||
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
|
||||
}
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Negative/argument Test for API - hipGraphKernelNodeSetAttribute
|
||||
* 1) Pass kernel node as nullptr for Set attribute api and verify
|
||||
* 2) Pass KernelNodeAttrID as invalid value for Set attribute api and verify
|
||||
* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api and verify
|
||||
* 4) Pass KernelNodeAttrValue as nullptr for Set attribute api and verify
|
||||
* 5) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value missProp as hipAccessPropertyPersisting
|
||||
* 6) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value hitProp as hipAccessPropertyPersisting
|
||||
* 7) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as 1.4
|
||||
* 8) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as 0
|
||||
* 9) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as 1
|
||||
* 10) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as -1.8
|
||||
* 11) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.hitRatio as -0.6
|
||||
* 12) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6
|
||||
* 13) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
* and pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6
|
||||
* 14) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value accessPolicyWindow.num_bytes as 1 MB
|
||||
* 15) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
|
||||
* and pass value base_ptr as nullptr
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphKernelNodeSetAttribute.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
|
||||
hipKernelNodeParams kNodeParams{};
|
||||
hipStream_t stream;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
hipError_t ret;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kNodeParams.gridDim = dim3(blocks);
|
||||
kNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kNodeParams.sharedMemBytes = 0;
|
||||
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kNodeParams));
|
||||
|
||||
hipKernelNodeAttrValue value_in, value_out;
|
||||
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
|
||||
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
|
||||
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
|
||||
memcpy(&value_out, &value_in, sizeof(hipKernelNodeAttrValue));
|
||||
|
||||
SECTION("Pass kernel node as nullptr for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(nullptr,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as invalid value for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttrID(-1), &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as INT_MAX value for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttrID(INT_MAX), &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
#if HT_AMD // getting SIGSEGV error in Cuda Setup
|
||||
SECTION("Pass KernelNodeAttrValue as nullptr for Set attribute api") {
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
#endif
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value missProp as hipAccessPropertyPersisting") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.missProp = hipAccessPropertyPersisting;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value hitProp as hipAccessPropertyPersisting") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as 1.4") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = 1.4;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as 0") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = 0;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as 1") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = 1;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as -1.8") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = -1.8;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.hitRatio as -0.6") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.hitRatio = -0.6;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" & pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.num_bytes = 1024;
|
||||
value_in.accessPolicyWindow.hitRatio = 0.6;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" & pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.num_bytes = 1024 * 1024 * 1024;
|
||||
value_in.accessPolicyWindow.hitRatio = -0.6;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value accessPolicyWindow.num_bytes as 1 MB") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.num_bytes = 1024 * 1024;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
|
||||
" and pass value base_ptr as nullptr") {
|
||||
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
|
||||
value_in.accessPolicyWindow.base_ptr = nullptr;
|
||||
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
|
||||
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,220 +20,69 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Negative -
|
||||
1) Pass node as nullptr and verify api returns error code.
|
||||
2) Pass un-initialize node and verify api returns error code.
|
||||
3) Pass pNodeParams as nullptr and verify api returns error code.
|
||||
Functional -
|
||||
1) Create a graph, add Memcpy node to graph with desired node params.
|
||||
Verify api fetches the node params mentioned while adding Memcpy node.
|
||||
2) Set Memcpy node params with hipGraphMemcpyNodeSetParams,
|
||||
now get the params and verify both are same.
|
||||
*/
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
#define SIZE 10
|
||||
#define UPDATESIZE 8
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeGetParams API Negative scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipError_t ret;
|
||||
hipGraphNode_t memcpyNode;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
SECTION("Pass node as nullptr") {
|
||||
ret = hipGraphMemcpyNodeGetParams(nullptr, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass un-initilize node") {
|
||||
hipGraphNode_t memcpyNode_uninit{};
|
||||
ret = hipGraphMemcpyNodeGetParams(memcpyNode_uninit, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass GetNodeParams as nullptr") {
|
||||
ret = hipGraphMemcpyNodeGetParams(memcpyNode, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeGetParams API Functional scenarios.
|
||||
/**
|
||||
* @addtogroup hipGraphMemcpyNodeGetParams hipGraphMemcpyNodeGetParams
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms *pNodeParams)` -
|
||||
* Gets a memcpy node's parameters
|
||||
* ________________________
|
||||
* Test cases from other APIs:
|
||||
* - @ref Unit_hipGraphMemcpyNodeSetParams_Positive_Basic
|
||||
*/
|
||||
|
||||
static bool compareHipPos(hipPos hPos1, hipPos hPos2) {
|
||||
if ((hPos1.x == hPos2.x) && (hPos1.y == hPos2.y) && (hPos1.z == hPos2.z))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
static bool compareHipExtent(hipExtent hExt1, hipExtent hExt2) {
|
||||
if ((hExt1.width == hExt2.width) && (hExt1.height == hExt2.height) &&
|
||||
(hExt1.depth == hExt2.depth))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
static bool compareHipPitchedPtr(hipPitchedPtr hpPtr1, hipPitchedPtr hpPtr2) {
|
||||
if ((reinterpret_cast<int *>(hpPtr1.ptr) ==
|
||||
reinterpret_cast<int *>(hpPtr2.ptr))
|
||||
&& (hpPtr1.pitch == hpPtr2.pitch)
|
||||
#if HT_AMD
|
||||
&& (hpPtr1.xsize == hpPtr2.xsize)
|
||||
/* xsize check below is disabled on nvidia as xsize value
|
||||
* is not being updated properly due to issue with CUDA api */
|
||||
#endif
|
||||
&& (hpPtr1.ysize == hpPtr2.ysize))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# node is nullptr
|
||||
* -# pNodeParams is nullptr
|
||||
* -# node is destroyed
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphMemcpyNodeGetParams.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative_Parameters") {
|
||||
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
|
||||
|
||||
static bool memcpyNodeCompare(hipMemcpy3DParms *mNode1,
|
||||
hipMemcpy3DParms *mNode2) {
|
||||
if (mNode1->srcArray != mNode2->srcArray)
|
||||
return false;
|
||||
if (!compareHipPos(mNode1->srcPos, mNode2->srcPos))
|
||||
return false;
|
||||
if (!compareHipPitchedPtr(mNode1->srcPtr, mNode2->srcPtr))
|
||||
return false;
|
||||
if (mNode1->dstArray != mNode2->dstArray)
|
||||
return false;
|
||||
if (!compareHipPos(mNode1->dstPos, mNode2->dstPos))
|
||||
return false;
|
||||
if (!compareHipPitchedPtr(mNode1->dstPtr, mNode2->dstPtr))
|
||||
return false;
|
||||
if (!compareHipExtent(mNode1->extent, mNode2->extent))
|
||||
return false;
|
||||
if (mNode1->kind != mNode2->kind)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Functional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
hipMemcpy3DParms params = {};
|
||||
params.srcPtr = src_alloc.pitched_ptr();
|
||||
params.srcPos = make_hipPos(0, 0, 0);
|
||||
params.dstPtr = dst_alloc.pitched_ptr();
|
||||
params.dstPos = make_hipPos(0, 0, 0);
|
||||
params.extent = extent;
|
||||
params.kind = hipMemcpyDeviceToDevice;
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
hipGraph_t graph = nullptr;
|
||||
hipGraphNode_t node = nullptr;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
SECTION("Get Memcpy Param and verify.") {
|
||||
hipMemcpy3DParms m3DGetParams;
|
||||
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
|
||||
&m3DGetParams));
|
||||
// Validating the result
|
||||
REQUIRE(true == memcpyNodeCompare(&myparms, &m3DGetParams));
|
||||
SECTION("node == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(nullptr, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Set memcpy params and Get param and verify.") {
|
||||
hipMemcpy3DParms myparms1, m3DGetParams1;
|
||||
constexpr int width1{UPDATESIZE}, height1{UPDATESIZE}, depth1{UPDATESIZE};
|
||||
hipArray_t devArray1;
|
||||
hipChannelFormatKind formatKind1 = hipChannelFormatKindSigned;
|
||||
int* hData1;
|
||||
uint32_t size1 = width1 * height1 * depth1 * sizeof(int);
|
||||
hData1 = reinterpret_cast<int*>(malloc(size1));
|
||||
REQUIRE(hData1 != nullptr);
|
||||
memset(hData1, 0, size1);
|
||||
for (int i = 0; i < depth1; i++) {
|
||||
for (int j = 0; j < height1; j++) {
|
||||
for (int k = 0; k < width1; k++) {
|
||||
hData1[i*width1*height1 + j*width1 + k] = i*width1*height1 +
|
||||
j*width1 + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind1);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc1,
|
||||
make_hipExtent(width1, height1, depth1), hipArrayDefault));
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.extent = make_hipExtent(width1 , height1, depth1);
|
||||
myparms1.srcPtr = make_hipPitchedPtr(hData1, width1 * sizeof(int),
|
||||
width1, height1);
|
||||
myparms1.dstArray = devArray1;
|
||||
myparms1.kind = hipMemcpyHostToDevice;
|
||||
|
||||
REQUIRE(hipSuccess == hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
|
||||
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
|
||||
&m3DGetParams1));
|
||||
REQUIRE(true == memcpyNodeCompare(&myparms1, &m3DGetParams1));
|
||||
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
free(hData1);
|
||||
SECTION("pNodeParams == nullptr") {
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms));
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(node, nullptr), hipErrorInvalidValue);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-208
|
||||
SECTION("Node is destroyed") {
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Negative -
|
||||
1) Pass node as nullptr and verify api returns error code.
|
||||
2) Pass un-initialize node and verify api returns error code.
|
||||
3) Pass pNodeParams as nullptr and verify api returns error code.
|
||||
Functional -
|
||||
1) Create a graph, add Memcpy node to graph with desired node params.
|
||||
Verify api fetches the node params mentioned while adding Memcpy node.
|
||||
2) Set Memcpy node params with hipGraphMemcpyNodeSetParams,
|
||||
now get the params and verify both are same.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#define SIZE 10
|
||||
#define UPDATESIZE 8
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeGetParams API Negative scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipError_t ret;
|
||||
hipGraphNode_t memcpyNode;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
SECTION("Pass node as nullptr") {
|
||||
ret = hipGraphMemcpyNodeGetParams(nullptr, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass un-initilize node") {
|
||||
hipGraphNode_t memcpyNode_uninit{};
|
||||
ret = hipGraphMemcpyNodeGetParams(memcpyNode_uninit, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass GetNodeParams as nullptr") {
|
||||
ret = hipGraphMemcpyNodeGetParams(memcpyNode, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeGetParams API Functional scenarios.
|
||||
*/
|
||||
|
||||
static bool compareHipPos(hipPos hPos1, hipPos hPos2) {
|
||||
if ((hPos1.x == hPos2.x) && (hPos1.y == hPos2.y) && (hPos1.z == hPos2.z))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
static bool compareHipExtent(hipExtent hExt1, hipExtent hExt2) {
|
||||
if ((hExt1.width == hExt2.width) && (hExt1.height == hExt2.height) &&
|
||||
(hExt1.depth == hExt2.depth))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
static bool compareHipPitchedPtr(hipPitchedPtr hpPtr1, hipPitchedPtr hpPtr2) {
|
||||
if ((reinterpret_cast<int *>(hpPtr1.ptr) ==
|
||||
reinterpret_cast<int *>(hpPtr2.ptr))
|
||||
&& (hpPtr1.pitch == hpPtr2.pitch)
|
||||
#if HT_AMD
|
||||
&& (hpPtr1.xsize == hpPtr2.xsize)
|
||||
/* xsize check below is disabled on nvidia as xsize value
|
||||
* is not being updated properly due to issue with CUDA api */
|
||||
#endif
|
||||
&& (hpPtr1.ysize == hpPtr2.ysize))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool memcpyNodeCompare(hipMemcpy3DParms *mNode1,
|
||||
hipMemcpy3DParms *mNode2) {
|
||||
if (mNode1->srcArray != mNode2->srcArray)
|
||||
return false;
|
||||
if (!compareHipPos(mNode1->srcPos, mNode2->srcPos))
|
||||
return false;
|
||||
if (!compareHipPitchedPtr(mNode1->srcPtr, mNode2->srcPtr))
|
||||
return false;
|
||||
if (mNode1->dstArray != mNode2->dstArray)
|
||||
return false;
|
||||
if (!compareHipPos(mNode1->dstPos, mNode2->dstPos))
|
||||
return false;
|
||||
if (!compareHipPitchedPtr(mNode1->dstPtr, mNode2->dstPtr))
|
||||
return false;
|
||||
if (!compareHipExtent(mNode1->extent, mNode2->extent))
|
||||
return false;
|
||||
if (mNode1->kind != mNode2->kind)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Functional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
SECTION("Get Memcpy Param and verify.") {
|
||||
hipMemcpy3DParms m3DGetParams;
|
||||
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
|
||||
&m3DGetParams));
|
||||
// Validating the result
|
||||
REQUIRE(true == memcpyNodeCompare(&myparms, &m3DGetParams));
|
||||
}
|
||||
|
||||
SECTION("Set memcpy params and Get param and verify.") {
|
||||
hipMemcpy3DParms myparms1, m3DGetParams1;
|
||||
constexpr int width1{UPDATESIZE}, height1{UPDATESIZE}, depth1{UPDATESIZE};
|
||||
hipArray_t devArray1;
|
||||
hipChannelFormatKind formatKind1 = hipChannelFormatKindSigned;
|
||||
int* hData1;
|
||||
uint32_t size1 = width1 * height1 * depth1 * sizeof(int);
|
||||
hData1 = reinterpret_cast<int*>(malloc(size1));
|
||||
REQUIRE(hData1 != nullptr);
|
||||
memset(hData1, 0, size1);
|
||||
for (int i = 0; i < depth1; i++) {
|
||||
for (int j = 0; j < height1; j++) {
|
||||
for (int k = 0; k < width1; k++) {
|
||||
hData1[i*width1*height1 + j*width1 + k] = i*width1*height1 +
|
||||
j*width1 + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind1);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc1,
|
||||
make_hipExtent(width1, height1, depth1), hipArrayDefault));
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.extent = make_hipExtent(width1 , height1, depth1);
|
||||
myparms1.srcPtr = make_hipPitchedPtr(hData1, width1 * sizeof(int),
|
||||
width1, height1);
|
||||
myparms1.dstArray = devArray1;
|
||||
myparms1.kind = hipMemcpyHostToDevice;
|
||||
|
||||
REQUIRE(hipSuccess == hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
|
||||
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
|
||||
&m3DGetParams1));
|
||||
REQUIRE(true == memcpyNodeCompare(&myparms1, &m3DGetParams1));
|
||||
|
||||
HIP_CHECK(hipFreeArray(devArray1));
|
||||
free(hData1);
|
||||
}
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,203 +20,264 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Negative -
|
||||
1) Pass node as nullptr and verify api returns error code.
|
||||
2) Pass un-initialize node and verify api returns error code.
|
||||
3) Pass pNodeParams as nullptr and verify api returns error code.
|
||||
Functional -
|
||||
1) Add Memcpy node to graph, update the Memcpy node params with set and
|
||||
launch the graph and check updated params are taking effect.
|
||||
2) Add Memcpy node to graph, launch graph, then update the Memcpy node params
|
||||
with set and launch the graph and check updated params are taking effect.
|
||||
*/
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <memcpy3d_tests_common.hh>
|
||||
|
||||
#define SIZE 10
|
||||
#include "graph_tests_common.hh"
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams API Negative scenarios.
|
||||
/**
|
||||
* @addtogroup hipGraphMemcpyNodeSetParams hipGraphMemcpyNodeSetParams
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphMemcpyNodeSetParams (hipGraphNode_t node, const hipMemcpy3DParms *pNodeParams)` - Sets a
|
||||
* memcpy node's parameters
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify that node parameters get updated correctly by creating a node with valid but
|
||||
* incorrect parameters, and the setting them to the correct values after which the graph is
|
||||
* executed and the results of the memcpy verified.
|
||||
* The test is run for all possible memcpy directions, with both the corresponding memcpy
|
||||
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphMemcpyNodeSetParams.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Positive_Basic") {
|
||||
constexpr bool async = false;
|
||||
|
||||
SECTION("Device to host") {
|
||||
Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
|
||||
SECTION("Device to host with default kind") {
|
||||
Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
|
||||
SECTION("Host to device") {
|
||||
Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
|
||||
SECTION("Host to device with default kind") {
|
||||
Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
|
||||
SECTION("Host to host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true, true>); }
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipError_t ret;
|
||||
hipGraphNode_t memcpyNode;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
SECTION("Pass node as nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams(nullptr, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass un-initialize node") {
|
||||
hipGraphNode_t memcpyNode_uninit{};
|
||||
ret = hipGraphMemcpyNodeSetParams(memcpyNode_uninit, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass SetNodeParams as nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams(memcpyNode, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams API Functional scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Functional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms, myparms1;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
int *hDataTemp = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hDataTemp != nullptr);
|
||||
memset(hDataTemp, 0, size);
|
||||
int *hOutputData = reinterpret_cast<int *>(malloc(size));
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
memset(hOutputData, 0, size);
|
||||
int *hOutputData1 = reinterpret_cast<int *>(malloc(size));
|
||||
REQUIRE(hOutputData1 != nullptr);
|
||||
memset(hOutputData1, 0, size);
|
||||
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int),
|
||||
width, height);
|
||||
myparms1.srcArray = devArray;
|
||||
myparms1.extent = make_hipExtent(width, height, depth);
|
||||
myparms1.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparms1));
|
||||
|
||||
SECTION("Update the memcpyNode and check") {
|
||||
// Device to host with updated host ptr hDataTemp -> hOutputData
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms1.srcArray = devArray;
|
||||
myparms1.extent = make_hipExtent(width, height, depth);
|
||||
myparms1.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Check result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
SECTION("Array from/to Host") {
|
||||
Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
|
||||
SECTION("Update the memcpyNode again and check") {
|
||||
// Device to host with updated host ptr hOutputData -> hOutputData1
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPtr = make_hipPitchedPtr(hOutputData1, width * sizeof(int),
|
||||
width, height);
|
||||
myparms1.srcArray = devArray;
|
||||
myparms1.extent = make_hipExtent(width, height, depth);
|
||||
myparms1.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparms1));
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Check result
|
||||
HipTest::checkArray(hData, hOutputData1, width, height, depth);
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220
|
||||
SECTION("Array from/to Device") {
|
||||
Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async, true, true>);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
free(hDataTemp);
|
||||
free(hOutputData);
|
||||
free(hOutputData1);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# node is nullptr
|
||||
* -# graph is nullptr
|
||||
* -# pDependencies is nullptr when numDependencies is not zero
|
||||
* -# A node in pDependencies originates from a different graph
|
||||
* -# numDependencies is invalid
|
||||
* -# A node is duplicated in pDependencies
|
||||
* -# dst is nullptr
|
||||
* -# src is nullptr
|
||||
* -# kind is an invalid enum value
|
||||
* -# count is zero
|
||||
* -# count is larger than dst allocation size
|
||||
* -# count is larger than src allocation size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNode.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
|
||||
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
|
||||
|
||||
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
|
||||
SECTION("node == nullptr") {
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(nullptr, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("dst_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.x = src_ptr.pitch - extent.width + 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.y out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.y = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.y out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.y = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.z out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.z = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.z out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.z = 1;
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent,
|
||||
static_cast<hipMemcpyKind>(-1));
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
};
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
@@ -6,8 +6,10 @@ in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,169 +19,180 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Functional-
|
||||
1) Create a graph, add Memcpy node to graph, update the Memcpy node params with set and make sure they are taking effect.
|
||||
Negative-
|
||||
1) Pass pGraphNode as nullptr and check if api returns error.
|
||||
2) Pass destination ptr is nullptr, api expected to return error code.
|
||||
3) Pass source ptr is nullptr, api expected to return error code.
|
||||
4) Pass count as zero, api expected to return error code.
|
||||
5) Pass same pointer as source ptr and destination ptr, api expected to return error code.
|
||||
6) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code.
|
||||
7) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code.
|
||||
8) If count is more than allocated size for source and destination ptr, api should return error code.
|
||||
9) If count is less than allocated size for source and destination ptr, api should return error code.
|
||||
*/
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams1D API Negative scenarios.
|
||||
#include "graph_tests_common.hh"
|
||||
|
||||
static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) {
|
||||
switch (direction) {
|
||||
case hipMemcpyHostToDevice:
|
||||
return hipMemcpyDeviceToHost;
|
||||
case hipMemcpyDeviceToHost:
|
||||
return hipMemcpyHostToDevice;
|
||||
default:
|
||||
return direction;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @addtogroup hipGraphMemcpyNodeSetParams1D hipGraphMemcpyNodeSetParams1D
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void *dst, const void *src, size_t count,
|
||||
* hipMemcpyKind kind)` - Sets a memcpy node's parameters to perform a 1-dimensional copy
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *A_h;
|
||||
hipGraphNode_t memcpyNode{};
|
||||
hipError_t ret;
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&A_h, Nbytes));
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify that node parameters get updated correctly by creating a node with valid but
|
||||
* incorrect parameters, and the setting them to the correct values after which the graph is
|
||||
* executed and the results of the memcpy verified.
|
||||
* The test is run for all possible memcpy directions, with both the corresponding memcpy
|
||||
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphMemcpyNodeSetParams1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Positive_Basic") {
|
||||
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, src, dst, count / 2,
|
||||
ReverseMemcpyDirection(direction)));
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(node, dst, src, count, direction));
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
SECTION("Pass pGraphNode as nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(nullptr, A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
return hipSuccess;
|
||||
};
|
||||
|
||||
#if HT_NVIDIA
|
||||
MemcpyWithDirectionCommonTests<false>(f);
|
||||
#else
|
||||
using namespace std::placeholders;
|
||||
|
||||
SECTION("Device to host") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
|
||||
}
|
||||
SECTION("Pass destination ptr is nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, nullptr, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("Host to device") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
|
||||
}
|
||||
SECTION("Pass source ptr is nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, nullptr, Nbytes,
|
||||
hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
}
|
||||
}
|
||||
SECTION("Pass count as zero") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, 0,
|
||||
hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
}
|
||||
#if HT_AMD
|
||||
SECTION("Pass same pointer as source ptr and destination ptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d, Nbytes,
|
||||
hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-209
|
||||
#if 0
|
||||
SECTION("Host to host") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
|
||||
}
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d-5, Nbytes,
|
||||
hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d+5, A_d, Nbytes-5,
|
||||
hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Copy more than allocated memory") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes+8,
|
||||
hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Copy less than allocated memory") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes-8,
|
||||
hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Change the kind from H2D to D2H") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes,
|
||||
hipMemcpyDeviceToHost);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
|
||||
// Disabled on AMD due to defect - EXSWHTEC-210
|
||||
#if 0
|
||||
SECTION("Device to host with default kind") {
|
||||
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(A_h));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
SECTION("Host to device with default kind") {
|
||||
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams1D API Functional scenarios.
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Verify API behaviour with invalid arguments:
|
||||
* -# node is nullptr
|
||||
* -# dst is nullptr
|
||||
* -# src is nullptr
|
||||
* -# kind is an invalid enum value
|
||||
* -# count is zero
|
||||
* -# count is larger than dst allocation size
|
||||
* -# count is larger than src allocation size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphMemcpyNodeSetParams1D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, Nbytes);
|
||||
|
||||
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
|
||||
hipGraphNode_t kernel_vecAdd;
|
||||
hipKernelNodeParams kernelNodeParams{};
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipStream_t streamForGraph;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
hipGraph_t graph = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
int src[2] = {}, dst[2] = {};
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h,
|
||||
Nbytes, hipMemcpyHostToDevice));
|
||||
hipGraphNode_t node = nullptr;
|
||||
HIP_CHECK(
|
||||
hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(dst), hipMemcpyDefault));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d,
|
||||
Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, hData, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost));
|
||||
SECTION("node == nullptr") {
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams1D(nullptr, dst, src, sizeof(dst), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
|
||||
kernelNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
|
||||
kernelNodeParams.gridDim = dim3(blocks);
|
||||
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kernelNodeParams.sharedMemBytes = 0;
|
||||
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
||||
kernelNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
|
||||
&kernelNodeParams));
|
||||
MemcpyWithDirectionCommonNegativeTests(
|
||||
std::bind(hipGraphMemcpyNodeSetParams1D, node, _1, _2, _3, _4), dst, src, sizeof(dst),
|
||||
hipMemcpyDefault);
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
|
||||
SECTION("count == 0") {
|
||||
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams1D(node, dst, src, 0, hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
SECTION("count larger than dst allocation size") {
|
||||
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphMemcpyNodeSetParams1D(node, dev_dst.ptr(), src, sizeof(src), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD(A_h, B_h, hData, N);
|
||||
SECTION("count larger than src allocation size") {
|
||||
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
|
||||
HIP_CHECK_ERROR(
|
||||
hipGraphMemcpyNodeSetParams1D(node, dst, dev_src.ptr(), sizeof(dst), hipMemcpyDefault),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
free(hData);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Functional-
|
||||
1) Create a graph, add Memcpy node to graph, update the Memcpy node params with set and make sure
|
||||
they are taking effect. Negative- 1) Pass pGraphNode as nullptr and check if api returns error. 2)
|
||||
Pass destination ptr is nullptr, api expected to return error code. 3) Pass source ptr is nullptr,
|
||||
api expected to return error code. 4) Pass count as zero, api expected to return error code. 5) Pass
|
||||
same pointer as source ptr and destination ptr, api expected to return error code. 6) Pass overlap
|
||||
memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected
|
||||
to return error code. 7) Pass overlap memory as source ptr and destination ptr where destination ptr
|
||||
is ahead of source ptr, api expected to return error code. 8) If count is more than allocated size
|
||||
for source and destination ptr, api should return error code. 9) If count is less than allocated
|
||||
size for source and destination ptr, api should return error code.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams1D API Negative scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *A_h;
|
||||
hipGraphNode_t memcpyNode{};
|
||||
hipError_t ret;
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&A_h, Nbytes));
|
||||
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
SECTION("Pass pGraphNode as nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(nullptr, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass destination ptr is nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, nullptr, A_h, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass source ptr is nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, nullptr, Nbytes, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass count as zero") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, 0, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
#if HT_AMD
|
||||
SECTION("Pass same pointer as source ptr and destination ptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d, Nbytes, hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
#endif
|
||||
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d - 5, Nbytes, hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d + 5, A_d, Nbytes - 5,
|
||||
hipMemcpyDeviceToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Copy more than allocated memory") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes + 8, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Copy less than allocated memory") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes - 8, hipMemcpyHostToDevice);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
SECTION("Change the kind from H2D to D2H") {
|
||||
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes, hipMemcpyDeviceToHost);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(A_h));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams1D API Functional scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Functional") {
|
||||
constexpr size_t N = 1024;
|
||||
constexpr size_t Nbytes = N * sizeof(int);
|
||||
constexpr auto blocksPerCU = 6; // to hide latency
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t NElem{N};
|
||||
|
||||
int* hData = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, Nbytes);
|
||||
|
||||
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
|
||||
hipGraphNode_t kernel_vecAdd;
|
||||
hipKernelNodeParams kernelNodeParams{};
|
||||
hipGraph_t graph;
|
||||
hipGraphExec_t graphExec;
|
||||
hipStream_t streamForGraph;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, hData, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&NElem)};
|
||||
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
||||
kernelNodeParams.gridDim = dim3(blocks);
|
||||
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kernelNodeParams.sharedMemBytes = 0;
|
||||
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
||||
kernelNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, &kernelNodeParams));
|
||||
|
||||
// Create dependencies
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verify graph execution result
|
||||
HipTest::checkVectorADD(A_h, B_h, hData, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
free(hData);
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
Negative -
|
||||
1) Pass node as nullptr and verify api returns error code.
|
||||
2) Pass un-initialize node and verify api returns error code.
|
||||
3) Pass pNodeParams as nullptr and verify api returns error code.
|
||||
Functional -
|
||||
1) Add Memcpy node to graph, update the Memcpy node params with set and
|
||||
launch the graph and check updated params are taking effect.
|
||||
2) Add Memcpy node to graph, launch graph, then update the Memcpy node params
|
||||
with set and launch the graph and check updated params are taking effect.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#define SIZE 10
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams API Negative scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms;
|
||||
int* hData;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipError_t ret;
|
||||
hipGraphNode_t memcpyNode;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
|
||||
SECTION("Pass node as nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams(nullptr, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass un-initialize node") {
|
||||
hipGraphNode_t memcpyNode_uninit{};
|
||||
ret = hipGraphMemcpyNodeSetParams(memcpyNode_uninit, &myparms);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
SECTION("Pass SetNodeParams as nullptr") {
|
||||
ret = hipGraphMemcpyNodeSetParams(memcpyNode, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Test verifies hipGraphMemcpyNodeSetParams API Functional scenarios.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Functional") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
|
||||
hipArray_t devArray;
|
||||
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
||||
hipMemcpy3DParms myparms, myparms1;
|
||||
uint32_t size = width * height * depth * sizeof(int);
|
||||
|
||||
int *hData = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hData != nullptr);
|
||||
memset(hData, 0, size);
|
||||
int *hDataTemp = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(hDataTemp != nullptr);
|
||||
memset(hDataTemp, 0, size);
|
||||
int *hOutputData = reinterpret_cast<int *>(malloc(size));
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
memset(hOutputData, 0, size);
|
||||
int *hOutputData1 = reinterpret_cast<int *>(malloc(size));
|
||||
REQUIRE(hOutputData1 != nullptr);
|
||||
memset(hOutputData1, 0, size);
|
||||
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
|
||||
height, depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms.dstArray = devArray;
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
|
||||
hipGraph_t graph;
|
||||
hipGraphNode_t memcpyNode;
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
|
||||
dependencies.push_back(memcpyNode);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int),
|
||||
width, height);
|
||||
myparms1.srcArray = devArray;
|
||||
myparms1.extent = make_hipExtent(width, height, depth);
|
||||
myparms1.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparms1));
|
||||
|
||||
SECTION("Update the memcpyNode and check") {
|
||||
// Device to host with updated host ptr hDataTemp -> hOutputData
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
|
||||
width, height);
|
||||
myparms1.srcArray = devArray;
|
||||
myparms1.extent = make_hipExtent(width, height, depth);
|
||||
myparms1.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Check result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
}
|
||||
|
||||
SECTION("Update the memcpyNode again and check") {
|
||||
// Device to host with updated host ptr hOutputData -> hOutputData1
|
||||
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms1.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms1.dstPtr = make_hipPitchedPtr(hOutputData1, width * sizeof(int),
|
||||
width, height);
|
||||
myparms1.srcArray = devArray;
|
||||
myparms1.extent = make_hipExtent(width, height, depth);
|
||||
myparms1.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
|
||||
dependencies.size(), &myparms1));
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
|
||||
|
||||
// Instantiate and launch the graph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Check result
|
||||
HipTest::checkArray(hData, hOutputData1, width, height, depth);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipFreeArray(devArray));
|
||||
free(hData);
|
||||
free(hDataTemp);
|
||||
free(hOutputData);
|
||||
free(hOutputData1);
|
||||
}
|
||||
@@ -38,9 +38,13 @@ set(TEST_SRC
|
||||
hipMemcpy3DAsync.cc
|
||||
hipMemcpy3DAsync_old.cc
|
||||
hipMemcpyParam2D.cc
|
||||
hipMemcpyParam2D_old.cc
|
||||
hipMemcpyParam2DAsync.cc
|
||||
hipMemcpyParam2DAsync_old.cc
|
||||
hipMemcpy2D.cc
|
||||
hipMemcpy2D_old.cc
|
||||
hipMemcpy2DAsync.cc
|
||||
hipMemcpy2DAsync_old.cc
|
||||
hipMemcpy2DFromArray.cc
|
||||
hipMemcpy2DFromArray_old.cc
|
||||
hipMemcpy2DFromArrayAsync.cc
|
||||
|
||||
@@ -1,496 +1,151 @@
|
||||
/*
|
||||
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup hipMemcpy2D hipMemcpy2D
|
||||
* @{
|
||||
* @ingroup MemcpyTest
|
||||
* `hipMemcpy2D(void* dst, size_t dpitch, const void* src,
|
||||
* size_t spitch, size_t width, size_t height,
|
||||
* hipMemcpyKind kind)` -
|
||||
* Copies data between host and device.
|
||||
*/
|
||||
|
||||
// Testcase Description:
|
||||
// 1) Verifies the working of Memcpy2D API negative scenarios by
|
||||
// Pass NULL to destination pointer
|
||||
// Pass NULL to Source pointer
|
||||
// Pass width greater than spitch/dpitch
|
||||
// 2) Verifies hipMemcpy2D API by
|
||||
// pass 0 to destionation pitch
|
||||
// pass 0 to source pitch
|
||||
// pass 0 to width
|
||||
// pass 0 to height
|
||||
// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by
|
||||
// performing D2H, D2D and H2D memory kind copies on same GPU
|
||||
// 4) Verifies working of Memcpy2D API for the following scenarios
|
||||
// H2D-D2D-D2H on host and device memory
|
||||
// H2D-D2D-D2H on pinned host and device memory
|
||||
// H2D-D2D-D2H functionalities where memory is allocated in GPU-0
|
||||
// and API is triggered from GPU-1
|
||||
#include "memcpy2d_tests_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static constexpr auto NUM_W{16};
|
||||
static constexpr auto NUM_H{16};
|
||||
static constexpr auto COLUMNS{8};
|
||||
static constexpr auto ROWS{8};
|
||||
TEST_CASE("Unit_hipMemcpy2D_Positive_Basic") {
|
||||
constexpr bool async = false;
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2D API on same GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
SECTION("Device to Host") { Memcpy2DDeviceToHostShell<async>(hipMemcpy2D); }
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "B_d" using D2D copy
|
||||
"B_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
// 1 refers to pinned host memory
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
|
||||
*B_d{nullptr};
|
||||
size_t pitch_A, pitch_B;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
SECTION("Device to Device") {
|
||||
SECTION("Peer access disabled") { Memcpy2DDeviceToDeviceShell<async, false>(hipMemcpy2D); }
|
||||
SECTION("Peer access enabled") { Memcpy2DDeviceToDeviceShell<async, true>(hipMemcpy2D); }
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
|
||||
&pitch_B, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
SECTION("Host to Device") { Memcpy2DHostToDeviceShell<async>(hipMemcpy2D); }
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice));
|
||||
|
||||
// hipMemcpy2D Device to Host
|
||||
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
SECTION("Host to Host") { Memcpy2DHostToHostShell<async>(hipMemcpy2D); }
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs the following scenarios of hipMemcpy2D API on same GPU.
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
The src and dst input pointers to hipMemCpy2D add an offset to the pointers
|
||||
returned by the allocation functions.
|
||||
TEST_CASE("Unit_hipMemcpy2D_Positive_Synchronization_Behavior") {
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "B_d" using D2D copy
|
||||
"B_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(hipMemcpy2D, true); }
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
// 1 refers to pinned host memory
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
|
||||
*B_d{nullptr};
|
||||
size_t pitch_A, pitch_B;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
SECTION("Device to Host") {
|
||||
Memcpy2DDtoHPageableSyncBehavior(hipMemcpy2D, true);
|
||||
Memcpy2DDtoHPinnedSyncBehavior(hipMemcpy2D, true);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
|
||||
&pitch_B, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h,
|
||||
COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyHostToDevice));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B,
|
||||
A_d+COLUMNS*sizeof(TestType),
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice));
|
||||
|
||||
// hipMemcpy2D Device to Host
|
||||
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType),
|
||||
B_d+COLUMNS*sizeof(TestType), pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
SECTION("Device to Device") {
|
||||
#if HT_NVIDIA
|
||||
Memcpy2DDtoDSyncBehavior(hipMemcpy2D, false);
|
||||
#else
|
||||
Memcpy2DDtoDSyncBehavior(hipMemcpy2D, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232
|
||||
SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(hipMemcpy2D, true); }
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
3. Device context change where memory is allocated in GPU-0
|
||||
and API is trigerred from GPU-1
|
||||
TEST_CASE("Unit_hipMemcpy2D_Positive_Parameters") {
|
||||
constexpr bool async = false;
|
||||
Memcpy2DZeroWidthHeight<async>(hipMemcpy2D);
|
||||
}
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "X_d" using D2D copy
|
||||
"X_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy2D_Negative_Parameters") {
|
||||
constexpr size_t cols = 128;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
int canAccessPeer = 0;
|
||||
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
char *X_d{nullptr};
|
||||
size_t pitch_X;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
|
||||
&pitch_X, width, NUM_H));
|
||||
|
||||
// Change device
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device
|
||||
HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice));
|
||||
|
||||
// Device to Host
|
||||
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d,
|
||||
pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
} else {
|
||||
SUCCEED("Machine does not seem to have P2P");
|
||||
constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
SECTION("dst == nullptr") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2D(nullptr, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipped the testcase as no of devices is less than 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This Testcase verifies the null size checks of hipMemcpy2D API
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2D_SizeCheck") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2D API where Source Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, 0, A_d,
|
||||
pitch_A, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where Destination Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, width, A_d,
|
||||
0, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where height is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, width, A_d,
|
||||
pitch_A, NUM_W, 0,
|
||||
hipMemcpyDeviceToHost) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where width is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, width, A_d,
|
||||
pitch_A, 0, NUM_H,
|
||||
hipMemcpyDeviceToHost) == hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This Testcase verifies all the negative scenarios of hipMemcpy2D API
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2D_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
|
||||
REQUIRE(hipMemcpy2D(nullptr, width, A_d,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
|
||||
REQUIRE(hipMemcpy2D(nullptr, width, nullptr,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where width is greater than destination pitch") {
|
||||
REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A,
|
||||
COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
static void hipMemcpy2D_Basic_Size_Test(size_t inc) {
|
||||
constexpr int defaultProgramSize = 256 * 1024 * 1024;
|
||||
constexpr int N = 2;
|
||||
constexpr int value = 42;
|
||||
int *in, *out, *dev;
|
||||
size_t newSize = 0, inp = 0;
|
||||
size_t size = sizeof(int) * N * inc;
|
||||
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
if ( free < 2 * size )
|
||||
newSize = ( free - defaultProgramSize ) / 2;
|
||||
else
|
||||
newSize = size;
|
||||
|
||||
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
|
||||
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
|
||||
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&in, newSize));
|
||||
HIP_CHECK(hipHostMalloc(&out, newSize));
|
||||
HIP_CHECK(hipMalloc(&dev, newSize));
|
||||
|
||||
inp = newSize / (sizeof(int) * N);
|
||||
for (size_t i=0; i < N; i++) {
|
||||
in[i * inp] = value;
|
||||
}
|
||||
|
||||
size_t pitch = sizeof(int) * inp;
|
||||
|
||||
HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int),
|
||||
N, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int),
|
||||
N, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i=0; i < N; i++) {
|
||||
REQUIRE(out[i * inp] == value);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(dev));
|
||||
HIP_CHECK(hipHostFree(in));
|
||||
HIP_CHECK(hipHostFree(out));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs multidevice size check on hipMemcpy2D API
|
||||
1. Verify hipMemcpy2D with 1 << 20 size
|
||||
2. Verify hipMemcpy2D with 1 << 21 size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
size_t input = 1 << 20;
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
|
||||
SECTION("Verify hipMemcpy2D with 1 << 20 size") {
|
||||
hipMemcpy2D_Basic_Size_Test(input);
|
||||
SECTION("src == nullptr") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, nullptr, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Verify hipMemcpy2D with 1 << 21 size") {
|
||||
input <<= 1;
|
||||
hipMemcpy2D_Basic_Size_Test(input);
|
||||
|
||||
SECTION("dpitch < width") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2D(dst, width - 1, src, spitch, width, height, kind),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("spitch < width") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, src, width - 1, width, height, kind),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("dpitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy2D(dst, static_cast<size_t>(attr) + 1, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("spitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy2D(dst, dpitch, src, static_cast<size_t>(attr) + 1, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy2D(dst, dpitch, src, spitch, width, height, static_cast<hipMemcpyKind>(-1)),
|
||||
hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
|
||||
cols * sizeof(int), rows, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard2D<int> src_alloc(cols, rows);
|
||||
LinearAllocGuard2D<int> dst_alloc(cols, rows);
|
||||
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,555 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync
|
||||
* @{
|
||||
* @ingroup MemcpyTest
|
||||
* `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src,
|
||||
* size_t spitch, size_t width, size_t height,
|
||||
* hipMemcpyKind kind, hipStream_t stream = 0 )` -
|
||||
* Copies data between host and device.
|
||||
*/
|
||||
|
||||
// Testcase Description:
|
||||
// 1) Verifies the working of Memcpy2DAsync API negative scenarios by
|
||||
// Pass NULL to destination pointer
|
||||
// Pass NULL to Source pointer
|
||||
// Pass width greater than spitch/dpitch
|
||||
// 2) Verifies hipMemcpy2DAsync API by
|
||||
// pass 0 to destionation pitch
|
||||
// pass 0 to source pitch
|
||||
// pass 0 to width
|
||||
// pass 0 to height
|
||||
// 3) Verifies working of Memcpy2DAsync API on host memory
|
||||
// and pinned host memory by
|
||||
// performing D2H, D2D and H2D memory kind copies on same GPU
|
||||
// 4) Verifies working of Memcpy2DAsync API on host memory
|
||||
// and pinned host memory by
|
||||
// performing D2H, D2D and H2D memory kind copies on peer GPU
|
||||
// 5) Verifies working of Memcpy2DAsync API where memory is allocated
|
||||
// in GPU-0 and stream is created on GPU-1
|
||||
#include "memcpy2d_tests_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static constexpr auto NUM_W{16};
|
||||
static constexpr auto NUM_H{16};
|
||||
static constexpr auto COLUMNS{6};
|
||||
static constexpr auto ROWS{6};
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic") {
|
||||
using namespace std::placeholders;
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This performs the following scenarios of hipMemcpy2DAsync API on same GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
constexpr bool async = true;
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "B_d" using D2D copy
|
||||
"B_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
|
||||
const StreamGuard stream_guard(stream_type);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
// 1 refers to pinned host memory
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
|
||||
*B_d{nullptr};
|
||||
size_t pitch_A, pitch_B;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
|
||||
&pitch_B, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
SECTION("Calling Async apis with stream object created by user") {
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice, stream));
|
||||
|
||||
// hipMemcpy2DAsync Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
SECTION("Calling Async apis with hipStreamPerThread") {
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyHostToDevice, hipStreamPerThread));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToDevice, hipStreamPerThread));
|
||||
|
||||
// hipMemcpy2DAsync Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
SECTION("Device to Host") {
|
||||
Memcpy2DDeviceToHostShell<async>(
|
||||
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "X_d" using D2D copy
|
||||
"X_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
int canAccessPeer = 0;
|
||||
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
hipStream_t stream;
|
||||
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Change device
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
char *X_d{nullptr};
|
||||
size_t pitch_X;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
|
||||
&pitch_X, width, NUM_H));
|
||||
|
||||
// Device to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice, stream));
|
||||
|
||||
// Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
|
||||
pitch_X, COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
} else {
|
||||
SUCCEED("Machine does not seem to have P2P");
|
||||
SECTION("Device to Device") {
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy2DDeviceToDeviceShell<async, false>(
|
||||
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipped the testcase as no of devices is less than 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
Memory is allocated in GPU-0 and Stream is created in GPU-1
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "X_d" using D2D copy
|
||||
"X_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
int canAccessPeer = 0;
|
||||
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
hipStream_t stream;
|
||||
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
char *X_d{nullptr};
|
||||
size_t pitch_X;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
|
||||
&pitch_X, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Change device
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Device to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice, stream));
|
||||
|
||||
// Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
|
||||
pitch_X, COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
} else {
|
||||
SUCCEED("Machine does not seem to have P2P");
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipped the testcase as no of devices is less than 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase verifies the null checks of hipMemcpy2DAsync API
|
||||
1. hipMemcpy2DAsync API where Source Pitch is zero
|
||||
2. hipMemcpy2DAsync API where Destination Pitch is zero
|
||||
3. hipMemcpy2DAsync API where height is zero
|
||||
4. hipMemcpy2DAsync API where width is zero
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where Source Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d,
|
||||
pitch_A, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
|
||||
0, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where height is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
|
||||
pitch_A, NUM_W, 0,
|
||||
hipMemcpyDeviceToHost, stream) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where width is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
|
||||
pitch_A, 0, NUM_H,
|
||||
hipMemcpyDeviceToHost, stream) == hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs the negative scenarios of hipMemcpy2DAsync API
|
||||
1. hipMemcpy2DAsync API by Passing nullptr to destination
|
||||
2. hipMemcpy2DAsync API by Passing nullptr to source
|
||||
3. hipMemcpy2DAsync API where width is > destination pitch
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") {
|
||||
REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API by Passing nullptr to source") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where width is > destination pitch") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A,
|
||||
COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) {
|
||||
constexpr int defaultProgramSize = 256 * 1024 * 1024;
|
||||
constexpr int N = 2;
|
||||
constexpr int value = 42;
|
||||
int *in, *out, *dev;
|
||||
size_t newSize = 0, inp = 0;
|
||||
size_t size = sizeof(int) * N * inc;
|
||||
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
if ( free < 2 * size )
|
||||
newSize = ( free - defaultProgramSize ) / 2;
|
||||
else
|
||||
newSize = size;
|
||||
|
||||
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
|
||||
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
|
||||
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&in, newSize));
|
||||
HIP_CHECK(hipHostMalloc(&out, newSize));
|
||||
HIP_CHECK(hipMalloc(&dev, newSize));
|
||||
|
||||
inp = newSize / (sizeof(int) * N);
|
||||
for (size_t i=0; i < N; i++) {
|
||||
in[i * inp] = value;
|
||||
}
|
||||
|
||||
size_t pitch = sizeof(int) * inp;
|
||||
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int),
|
||||
N, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int),
|
||||
N, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
for (size_t i=0; i < N; i++) {
|
||||
REQUIRE(out[i * inp] == value);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(dev));
|
||||
HIP_CHECK(hipHostFree(in));
|
||||
HIP_CHECK(hipHostFree(out));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs multidevice size check on hipMemcpy2DAsync API
|
||||
1. Verify hipMemcpy2DAsync with 1 << 20 size
|
||||
2. Verify hipMemcpy2DAsync with 1 << 21 size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
size_t input = 1 << 20;
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
|
||||
SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") {
|
||||
hipMemcpy2DAsync_Basic_Size_Test(input);
|
||||
}
|
||||
SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") {
|
||||
input <<= 1;
|
||||
hipMemcpy2DAsync_Basic_Size_Test(input);
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy2DDeviceToDeviceShell<async, true>(
|
||||
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Host to Device") {
|
||||
Memcpy2DHostToDeviceShell<async>(
|
||||
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
Memcpy2DHostToHostShell<async>(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream),
|
||||
stream);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Synchronization_Behavior") {
|
||||
using namespace std::placeholders;
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
SECTION("Host to Device") {
|
||||
Memcpy2DHtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
|
||||
false);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
|
||||
SECTION("Device to Pageable Host") {
|
||||
Memcpy2DDtoHPageableSyncBehavior(
|
||||
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), true);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Device to Pinned Host") {
|
||||
Memcpy2DDtoHPinnedSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
|
||||
false);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
Memcpy2DDtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
|
||||
false);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
|
||||
SECTION("Host to Host") {
|
||||
Memcpy2DHtoHSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
|
||||
true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
constexpr bool async = true;
|
||||
Memcpy2DZeroWidthHeight<async>(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") {
|
||||
constexpr size_t cols = 128;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
SECTION("dst == nullptr") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(nullptr, dpitch, src, spitch, width, height, kind, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("src == nullptr") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, nullptr, spitch, width, height, kind, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("dpitch < width") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, width - 1, src, spitch, width, height, kind, nullptr),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
SECTION("spitch < width") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, width - 1, width, height, kind, nullptr),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
SECTION("dpitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, static_cast<size_t>(attr) + 1, src, spitch, width,
|
||||
height, kind, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("spitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, static_cast<size_t>(attr) + 1, width,
|
||||
height, kind, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
|
||||
static_cast<hipMemcpyKind>(-1), nullptr),
|
||||
hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
#endif
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235
|
||||
SECTION("Invalid stream") {
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
HIP_CHECK(hipStreamDestroy(stream_guard.stream()));
|
||||
HIP_CHECK_ERROR(
|
||||
hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream_guard.stream()),
|
||||
hipErrorContextIsDestroyed);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
SECTION("Host to device") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Device to host") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Host to host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
|
||||
cols * sizeof(int), rows, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
LinearAllocGuard2D<int> src_alloc(cols, rows);
|
||||
LinearAllocGuard2D<int> dst_alloc(cols, rows);
|
||||
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,555 @@
|
||||
/*
|
||||
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync
|
||||
* @{
|
||||
* @ingroup MemcpyTest
|
||||
* `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src,
|
||||
* size_t spitch, size_t width, size_t height,
|
||||
* hipMemcpyKind kind, hipStream_t stream = 0 )` -
|
||||
* Copies data between host and device.
|
||||
*/
|
||||
|
||||
// Testcase Description:
|
||||
// 1) Verifies the working of Memcpy2DAsync API negative scenarios by
|
||||
// Pass NULL to destination pointer
|
||||
// Pass NULL to Source pointer
|
||||
// Pass width greater than spitch/dpitch
|
||||
// 2) Verifies hipMemcpy2DAsync API by
|
||||
// pass 0 to destionation pitch
|
||||
// pass 0 to source pitch
|
||||
// pass 0 to width
|
||||
// pass 0 to height
|
||||
// 3) Verifies working of Memcpy2DAsync API on host memory
|
||||
// and pinned host memory by
|
||||
// performing D2H, D2D and H2D memory kind copies on same GPU
|
||||
// 4) Verifies working of Memcpy2DAsync API on host memory
|
||||
// and pinned host memory by
|
||||
// performing D2H, D2D and H2D memory kind copies on peer GPU
|
||||
// 5) Verifies working of Memcpy2DAsync API where memory is allocated
|
||||
// in GPU-0 and stream is created on GPU-1
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
static constexpr auto NUM_W{16};
|
||||
static constexpr auto NUM_H{16};
|
||||
static constexpr auto COLUMNS{6};
|
||||
static constexpr auto ROWS{6};
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This performs the following scenarios of hipMemcpy2DAsync API on same GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "B_d" using D2D copy
|
||||
"B_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
// 1 refers to pinned host memory
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
|
||||
*B_d{nullptr};
|
||||
size_t pitch_A, pitch_B;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
|
||||
&pitch_B, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
SECTION("Calling Async apis with stream object created by user") {
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice, stream));
|
||||
|
||||
// hipMemcpy2DAsync Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
SECTION("Calling Async apis with hipStreamPerThread") {
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyHostToDevice, hipStreamPerThread));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToDevice, hipStreamPerThread));
|
||||
|
||||
// hipMemcpy2DAsync Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
}
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "X_d" using D2D copy
|
||||
"X_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
int canAccessPeer = 0;
|
||||
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
hipStream_t stream;
|
||||
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Change device
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
char *X_d{nullptr};
|
||||
size_t pitch_X;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
|
||||
&pitch_X, width, NUM_H));
|
||||
|
||||
// Device to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice, stream));
|
||||
|
||||
// Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
|
||||
pitch_X, COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
} else {
|
||||
SUCCEED("Machine does not seem to have P2P");
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipped the testcase as no of devices is less than 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
Memory is allocated in GPU-0 and Stream is created in GPU-1
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "X_d" using D2D copy
|
||||
"X_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
int canAccessPeer = 0;
|
||||
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
hipStream_t stream;
|
||||
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
char *X_d{nullptr};
|
||||
size_t pitch_X;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
|
||||
&pitch_X, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Change device
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Device to Device
|
||||
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice, stream));
|
||||
|
||||
// Device to Host
|
||||
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
|
||||
pitch_X, COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
} else {
|
||||
SUCCEED("Machine does not seem to have P2P");
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipped the testcase as no of devices is less than 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase verifies the null checks of hipMemcpy2DAsync API
|
||||
1. hipMemcpy2DAsync API where Source Pitch is zero
|
||||
2. hipMemcpy2DAsync API where Destination Pitch is zero
|
||||
3. hipMemcpy2DAsync API where height is zero
|
||||
4. hipMemcpy2DAsync API where width is zero
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where Source Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d,
|
||||
pitch_A, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
|
||||
0, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where height is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
|
||||
pitch_A, NUM_W, 0,
|
||||
hipMemcpyDeviceToHost, stream) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where width is zero") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
|
||||
pitch_A, 0, NUM_H,
|
||||
hipMemcpyDeviceToHost, stream) == hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs the negative scenarios of hipMemcpy2DAsync API
|
||||
1. hipMemcpy2DAsync API by Passing nullptr to destination
|
||||
2. hipMemcpy2DAsync API by Passing nullptr to source
|
||||
3. hipMemcpy2DAsync API where width is > destination pitch
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") {
|
||||
REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API by Passing nullptr to source") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2DAsync API where width is > destination pitch") {
|
||||
REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A,
|
||||
COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) {
|
||||
constexpr int defaultProgramSize = 256 * 1024 * 1024;
|
||||
constexpr int N = 2;
|
||||
constexpr int value = 42;
|
||||
int *in, *out, *dev;
|
||||
size_t newSize = 0, inp = 0;
|
||||
size_t size = sizeof(int) * N * inc;
|
||||
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
if ( free < 2 * size )
|
||||
newSize = ( free - defaultProgramSize ) / 2;
|
||||
else
|
||||
newSize = size;
|
||||
|
||||
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
|
||||
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
|
||||
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&in, newSize));
|
||||
HIP_CHECK(hipHostMalloc(&out, newSize));
|
||||
HIP_CHECK(hipMalloc(&dev, newSize));
|
||||
|
||||
inp = newSize / (sizeof(int) * N);
|
||||
for (size_t i=0; i < N; i++) {
|
||||
in[i * inp] = value;
|
||||
}
|
||||
|
||||
size_t pitch = sizeof(int) * inp;
|
||||
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int),
|
||||
N, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int),
|
||||
N, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
for (size_t i=0; i < N; i++) {
|
||||
REQUIRE(out[i * inp] == value);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(dev));
|
||||
HIP_CHECK(hipHostFree(in));
|
||||
HIP_CHECK(hipHostFree(out));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs multidevice size check on hipMemcpy2DAsync API
|
||||
1. Verify hipMemcpy2DAsync with 1 << 20 size
|
||||
2. Verify hipMemcpy2DAsync with 1 << 21 size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
size_t input = 1 << 20;
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
|
||||
SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") {
|
||||
hipMemcpy2DAsync_Basic_Size_Test(input);
|
||||
}
|
||||
SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") {
|
||||
input <<= 1;
|
||||
hipMemcpy2DAsync_Basic_Size_Test(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,496 @@
|
||||
/*
|
||||
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup hipMemcpy2D hipMemcpy2D
|
||||
* @{
|
||||
* @ingroup MemcpyTest
|
||||
* `hipMemcpy2D(void* dst, size_t dpitch, const void* src,
|
||||
* size_t spitch, size_t width, size_t height,
|
||||
* hipMemcpyKind kind)` -
|
||||
* Copies data between host and device.
|
||||
*/
|
||||
|
||||
// Testcase Description:
|
||||
// 1) Verifies the working of Memcpy2D API negative scenarios by
|
||||
// Pass NULL to destination pointer
|
||||
// Pass NULL to Source pointer
|
||||
// Pass width greater than spitch/dpitch
|
||||
// 2) Verifies hipMemcpy2D API by
|
||||
// pass 0 to destionation pitch
|
||||
// pass 0 to source pitch
|
||||
// pass 0 to width
|
||||
// pass 0 to height
|
||||
// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by
|
||||
// performing D2H, D2D and H2D memory kind copies on same GPU
|
||||
// 4) Verifies working of Memcpy2D API for the following scenarios
|
||||
// H2D-D2D-D2H on host and device memory
|
||||
// H2D-D2D-D2H on pinned host and device memory
|
||||
// H2D-D2D-D2H functionalities where memory is allocated in GPU-0
|
||||
// and API is triggered from GPU-1
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
static constexpr auto NUM_W{16};
|
||||
static constexpr auto NUM_H{16};
|
||||
static constexpr auto COLUMNS{8};
|
||||
static constexpr auto ROWS{8};
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2D API on same GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "B_d" using D2D copy
|
||||
"B_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
// 1 refers to pinned host memory
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
|
||||
*B_d{nullptr};
|
||||
size_t pitch_A, pitch_B;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
|
||||
&pitch_B, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice));
|
||||
|
||||
// hipMemcpy2D Device to Host
|
||||
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs the following scenarios of hipMemcpy2D API on same GPU.
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
The src and dst input pointers to hipMemCpy2D add an offset to the pointers
|
||||
returned by the allocation functions.
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "B_d" using D2D copy
|
||||
"B_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
// 1 refers to pinned host memory
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
|
||||
*B_d{nullptr};
|
||||
size_t pitch_A, pitch_B;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
|
||||
&pitch_B, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h,
|
||||
COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyHostToDevice));
|
||||
|
||||
// Performs D2D on same GPU device
|
||||
HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B,
|
||||
A_d+COLUMNS*sizeof(TestType),
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice));
|
||||
|
||||
// hipMemcpy2D Device to Host
|
||||
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType),
|
||||
B_d+COLUMNS*sizeof(TestType), pitch_B,
|
||||
COLUMNS*sizeof(TestType), ROWS,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU
|
||||
1. H2D-D2D-D2H for Host Memory<-->Device Memory
|
||||
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
|
||||
3. Device context change where memory is allocated in GPU-0
|
||||
and API is trigerred from GPU-1
|
||||
|
||||
Input : "A_h" initialized based on data type
|
||||
"A_h" --> "A_d" using H2D copy
|
||||
"A_d" --> "X_d" using D2D copy
|
||||
"X_d" --> "B_h" using D2H copy
|
||||
Output: Validating A_h with B_h both should be equal for
|
||||
the number of COLUMNS and ROWS copied
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", ""
|
||||
, int, float, double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
int canAccessPeer = 0;
|
||||
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating memory
|
||||
if (mem_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
|
||||
char *X_d{nullptr};
|
||||
size_t pitch_X;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
|
||||
&pitch_X, width, NUM_H));
|
||||
|
||||
// Change device
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
// Host to Device
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
|
||||
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device
|
||||
HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d,
|
||||
pitch_A, COLUMNS*sizeof(TestType),
|
||||
ROWS, hipMemcpyDeviceToDevice));
|
||||
|
||||
// Device to Host
|
||||
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d,
|
||||
pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (mem_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
} else {
|
||||
SUCCEED("Machine does not seem to have P2P");
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipped the testcase as no of devices is less than 2");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This Testcase verifies the null size checks of hipMemcpy2D API
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2D_SizeCheck") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2D API where Source Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, 0, A_d,
|
||||
pitch_A, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where Destination Pitch is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, width, A_d,
|
||||
0, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where height is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, width, A_d,
|
||||
pitch_A, NUM_W, 0,
|
||||
hipMemcpyDeviceToHost) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where width is zero") {
|
||||
REQUIRE(hipMemcpy2D(A_h, width, A_d,
|
||||
pitch_A, 0, NUM_H,
|
||||
hipMemcpyDeviceToHost) == hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This Testcase verifies all the negative scenarios of hipMemcpy2D API
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2D_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int* A_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(int)};
|
||||
|
||||
// Allocating memory
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, nullptr, NUM_W*NUM_H);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Initialize the data
|
||||
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
|
||||
|
||||
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
|
||||
REQUIRE(hipMemcpy2D(nullptr, width, A_d,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
|
||||
REQUIRE(hipMemcpy2D(nullptr, width, nullptr,
|
||||
pitch_A, COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("hipMemcpy2D API where width is greater than destination pitch") {
|
||||
REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A,
|
||||
COLUMNS*sizeof(int), ROWS,
|
||||
hipMemcpyDeviceToHost) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
static void hipMemcpy2D_Basic_Size_Test(size_t inc) {
|
||||
constexpr int defaultProgramSize = 256 * 1024 * 1024;
|
||||
constexpr int N = 2;
|
||||
constexpr int value = 42;
|
||||
int *in, *out, *dev;
|
||||
size_t newSize = 0, inp = 0;
|
||||
size_t size = sizeof(int) * N * inc;
|
||||
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
if ( free < 2 * size )
|
||||
newSize = ( free - defaultProgramSize ) / 2;
|
||||
else
|
||||
newSize = size;
|
||||
|
||||
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
|
||||
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
|
||||
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
|
||||
|
||||
HIP_CHECK(hipHostMalloc(&in, newSize));
|
||||
HIP_CHECK(hipHostMalloc(&out, newSize));
|
||||
HIP_CHECK(hipMalloc(&dev, newSize));
|
||||
|
||||
inp = newSize / (sizeof(int) * N);
|
||||
for (size_t i=0; i < N; i++) {
|
||||
in[i * inp] = value;
|
||||
}
|
||||
|
||||
size_t pitch = sizeof(int) * inp;
|
||||
|
||||
HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int),
|
||||
N, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int),
|
||||
N, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i=0; i < N; i++) {
|
||||
REQUIRE(out[i * inp] == value);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(dev));
|
||||
HIP_CHECK(hipHostFree(in));
|
||||
HIP_CHECK(hipHostFree(out));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase performs multidevice size check on hipMemcpy2D API
|
||||
1. Verify hipMemcpy2D with 1 << 20 size
|
||||
2. Verify hipMemcpy2D with 1 << 21 size
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
size_t input = 1 << 20;
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
|
||||
for (int i=0; i < numDevices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
|
||||
SECTION("Verify hipMemcpy2D with 1 << 20 size") {
|
||||
hipMemcpy2D_Basic_Size_Test(input);
|
||||
}
|
||||
SECTION("Verify hipMemcpy2D with 1 << 21 size") {
|
||||
input <<= 1;
|
||||
hipMemcpy2D_Basic_Size_Test(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,337 +1,195 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the following scenarios of hipMemcpyParam2D API
|
||||
1. Negative Scenarios
|
||||
2. Extent Validation Scenarios
|
||||
3. D2D copy for different datatypes
|
||||
4. H2D and D2H copy for different datatypes
|
||||
*/
|
||||
#include "memcpy2d_tests_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static constexpr size_t NUM_W{10};
|
||||
static constexpr size_t NUM_H{10};
|
||||
/*
|
||||
* This testcase verifies D2D functionality of hipMemcpyParam2D API
|
||||
* Input: Intializing "A_d" device variable with "C_h" host variable
|
||||
* Output: "A_d" device variable to "E_d" device variable
|
||||
*
|
||||
* Validating the result by copying "E_d" to "A_h" and checking
|
||||
* it with the initalized data "C_h".
|
||||
*
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-D2D", "[hipMemcpyParam2D]", char, float, int,
|
||||
double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Basic") {
|
||||
constexpr bool async = false;
|
||||
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Initialize and Allocating Memory
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
|
||||
SECTION("Device to Host") { Memcpy2DDeviceToHostShell<async>(MemcpyParam2DAdapter<async>()); }
|
||||
#endif
|
||||
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
char *E_d;
|
||||
size_t pitch_E;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
|
||||
&pitch_E, width, NUM_H));
|
||||
|
||||
// Initalizing A_d with C_h
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
|
||||
NUM_W * sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = E_d;
|
||||
desc.dstDevice = hipDeviceptr_t(E_d);
|
||||
desc.dstPitch = pitch_E;
|
||||
desc.WidthInBytes = NUM_W * sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
|
||||
// Copying E_d to A_h
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
|
||||
NUM_W * sizeof(TestType), NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
SECTION("Device to Device") {
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy2DDeviceToDeviceShell<async, false>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2D API
|
||||
* H2D case:
|
||||
* Input: "C_h" host variable initialized with default data
|
||||
* Output: "A_d" device variable
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "A_d" device variable from the previous output
|
||||
* OutPut: "A_h" variable
|
||||
*
|
||||
* Validating the result by comparing "A_h" to "C_h"
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-H2D-D2H", "[hipMemcpyParam2D]", char, float,
|
||||
int, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
// 1 refers to pinned host memory and 0 refers
|
||||
// to unpinned memory
|
||||
auto memory_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Initialize and Allocating Memory
|
||||
TestType* A_h{nullptr}, *C_h{nullptr},
|
||||
*A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Based on memory type (pinned/unpinned) allocating memory
|
||||
if (memory_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy2DDeviceToDeviceShell<async, true>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
// Host to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = C_h;
|
||||
desc.srcDevice = hipDeviceptr_t(C_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(A_d);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
}
|
||||
|
||||
// Device to Host
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
SECTION("Host to Device") { Memcpy2DHostToDeviceShell<async>(MemcpyParam2DAdapter<async>()); }
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
|
||||
SECTION("Host to Host") { Memcpy2DHostToHostShell<async>(MemcpyParam2DAdapter<async>()); }
|
||||
#endif
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (memory_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior") {
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(MemcpyParam2DAdapter<>(), true); }
|
||||
|
||||
SECTION("Device to Pageable Host") {
|
||||
Memcpy2DDtoHPageableSyncBehavior(MemcpyParam2DAdapter<>(), true);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
|
||||
SECTION("Device to Pinned Host") {
|
||||
Memcpy2DDtoHPinnedSyncBehavior(MemcpyParam2DAdapter<>(), true);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Device to Device") {
|
||||
#if HT_NVIDIA
|
||||
Memcpy2DDtoDSyncBehavior(MemcpyParam2DAdapter<>(), false);
|
||||
#else
|
||||
Memcpy2DDtoDSyncBehavior(MemcpyParam2DAdapter<>(), true);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232
|
||||
SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(MemcpyParam2DAdapter<>(), true); }
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Parameters") {
|
||||
constexpr bool async = false;
|
||||
Memcpy2DZeroWidthHeight<async>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Array") {
|
||||
constexpr bool async = false;
|
||||
SECTION("Array from/to Host") {
|
||||
MemcpyParam2DArrayHostShell<async>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
SECTION("Array from/to Device") {
|
||||
MemcpyParam2DArrayDeviceShell<async>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Negative_Parameters") {
|
||||
constexpr size_t cols = 128;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
constexpr auto NegativeTests = [](void* dst, size_t dpitch, void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
SECTION("dst == nullptr") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(static_cast<void*>(nullptr), dpitch, src, spitch,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This testcase verifies the extent validation scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_ExtentValidation") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
// Allocating memory and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(char)};
|
||||
constexpr auto memsetval{100};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&B_h, nullptr, nullptr,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
SECTION("src == nullptr") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, static_cast<void*>(nullptr), spitch,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
// Device to Host
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
SECTION("dstPitch < WithInBytes") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, width - 1, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Destination Pitch is 0") {
|
||||
desc.dstPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
SECTION("srcPitch < WidthInBytes") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, src, width - 1, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dstPitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, static_cast<size_t>(attr) + 1, src, spitch,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("srcPitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, src, static_cast<size_t>(attr) + 1,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
|
||||
SECTION("WidthInBytes + srcXInBytes > srcPitch") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(spitch - width + 1, 0, 0))(
|
||||
dst, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("WidthInBytes + dstXInBytes > dstPitch") {
|
||||
HIP_CHECK_ERROR(
|
||||
MemcpyParam2DAdapter<>(make_hipExtent(0, 0, 0), make_hipExtent(dpitch - width + 1, 0, 0))(
|
||||
dst, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("srcY out of bounds") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(0, 1, 0))(dst, dpitch, src, spitch,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dstY out of bounds") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(0, 0, 0), make_hipExtent(0, 1, 0))(
|
||||
dst, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Source Pitch is 0") {
|
||||
desc.srcPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Height is 0") {
|
||||
desc.Height = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
SECTION("Host to Host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
|
||||
cols * sizeof(int), rows, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Width is 0") {
|
||||
desc.WidthInBytes = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard2D<int> src_alloc(cols, rows);
|
||||
LinearAllocGuard2D<int> dst_alloc(cols, rows);
|
||||
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies the negative scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating and Initializing the data
|
||||
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(float)};
|
||||
constexpr auto memsetval{100};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Null Pointer to Source Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to Destination Device Pointer") {
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = A_h;
|
||||
desc.srcDevice = hipDeviceptr_t(A_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to both Src & Dst Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Width > src/dest pitches") {
|
||||
desc.WidthInBytes = pitch_A+1;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
}
|
||||
@@ -1,441 +1,220 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the following scenarios of hipMemcpyParam2DAsync API
|
||||
1. Negative Scenarios
|
||||
2. Extent Validation Scenarios
|
||||
3. D2D copy for different datatypes
|
||||
4. H2D and D2H copy for different datatypes
|
||||
5. Device context change scenario where memory allocated in one GPU
|
||||
stream created in another GPU
|
||||
*/
|
||||
#include "memcpy2d_tests_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static constexpr size_t NUM_W{10};
|
||||
static constexpr size_t NUM_H{10};
|
||||
/*
|
||||
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
|
||||
* Where Memory is allocated in GPU-0 and stream is created in GPU-1
|
||||
*
|
||||
* Input: Intializing "A_d" device variable with "C_h" host variable
|
||||
* Output: "A_d" device variable to "E_d" device variable
|
||||
*
|
||||
* Validating the result by copying "E_d" to "A_h" and checking
|
||||
* it with the initalized data "C_h".
|
||||
*
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
|
||||
"[hipMemcpyParam2DAsync]", char, float, int, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Basic") {
|
||||
using namespace std::placeholders;
|
||||
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
TestType *E_d{nullptr};
|
||||
size_t pitch_E;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
|
||||
&pitch_E, width, NUM_H));
|
||||
constexpr bool async = true;
|
||||
|
||||
// Initalizing A_d with C_h
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
|
||||
const StreamGuard stream_guard(stream_type);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width,
|
||||
NUM_W*sizeof(TestType), NUM_H,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = E_d;
|
||||
desc.dstDevice = hipDeviceptr_t(E_d);
|
||||
desc.dstPitch = pitch_E;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Copying the result E_d to A_h host variable
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
|
||||
NUM_W*sizeof(TestType), NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(E_d));
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
|
||||
SECTION("Device to Host") {
|
||||
Memcpy2DDeviceToHostShell<async>(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
#endif
|
||||
SECTION("Device to Device") {
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy2DDeviceToDeviceShell<async, false>(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
|
||||
* Input: Intializing "A_d" device variable with "C_h" host variable
|
||||
* Output: "A_d" device variable to "E_d" device variable
|
||||
*
|
||||
* Validating the result by copying "E_d" to "A_h" and checking
|
||||
* it with the initalized data "C_h".
|
||||
*
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-D2D", "[hipMemcpyParam2DAsync]", char,
|
||||
int, float, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
TestType *E_d;
|
||||
size_t pitch_E;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
|
||||
&pitch_E, width, NUM_H));
|
||||
|
||||
// Initializing A_d with C_h
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
|
||||
NUM_W*sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = E_d;
|
||||
desc.dstDevice = hipDeviceptr_t(E_d);
|
||||
desc.dstPitch = pitch_E;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Copying the result E_d to A_h host variable
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
|
||||
NUM_W*sizeof(TestType), NUM_H, hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy2DDeviceToDeviceShell<async, true>(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
SECTION("Host to Device") {
|
||||
Memcpy2DHostToDeviceShell<async>(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
|
||||
SECTION("Host to Host") {
|
||||
Memcpy2DHostToHostShell<async>(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Synchronization_Behavior") {
|
||||
using namespace std::placeholders;
|
||||
|
||||
constexpr bool async = true;
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
SECTION("Host to Device") {
|
||||
Memcpy2DHtoDSyncBehavior(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), false);
|
||||
}
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
|
||||
SECTION("Device to Pageable Host") {
|
||||
Memcpy2DDtoHPageableSyncBehavior(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), true);
|
||||
}
|
||||
#endif
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
|
||||
SECTION("Device to Pinned Host") {
|
||||
Memcpy2DDtoHPinnedSyncBehavior(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), false);
|
||||
}
|
||||
#endif
|
||||
SECTION("Device to Device") {
|
||||
Memcpy2DDtoDSyncBehavior(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), false);
|
||||
}
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
|
||||
SECTION("Host to Host") {
|
||||
Memcpy2DHtoHSyncBehavior(
|
||||
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Parameters") {
|
||||
constexpr bool async = true;
|
||||
Memcpy2DZeroWidthHeight<async>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Array") {
|
||||
constexpr bool async = true;
|
||||
SECTION("Array from/to Host") {
|
||||
MemcpyParam2DArrayHostShell<async>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
SECTION("Array from/to Device") {
|
||||
MemcpyParam2DArrayDeviceShell<async>(MemcpyParam2DAdapter<async>());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2DAsync API
|
||||
* H2D case:
|
||||
* Input: "C_h" host variable initialized with default data
|
||||
* Output: "A_d" device variable
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "A_d" device variable from the previous output
|
||||
* OutPut: "A_h" variable
|
||||
*
|
||||
* Validating the result by comparing "A_h" to "C_h"
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-H2D-D2H", "[hipMemcpyParam2DAsync]",
|
||||
char, int, float, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative_Parameters") {
|
||||
constexpr bool async = true;
|
||||
|
||||
// 1 refers to pinned host memory and 0 refers
|
||||
// to unpinned memory
|
||||
auto memory_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr},
|
||||
*A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
hipStream_t stream;
|
||||
constexpr size_t cols = 128;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Based on memory type (pinned/unpinned) allocating memory
|
||||
if (memory_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
constexpr auto NegativeTests = [](void* dst, size_t dpitch, void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
SECTION("dst == nullptr") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(static_cast<void*>(nullptr), dpitch, src,
|
||||
spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
// Host to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = C_h;
|
||||
desc.srcDevice = hipDeviceptr_t(C_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(A_d);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Device to Host
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
if (memory_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
SECTION("src == nullptr") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, dpitch, static_cast<void*>(nullptr),
|
||||
spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This testcase verifies the extent validation scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_ExtentValidation") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
SECTION("dstPitch < WidthInBytes") {
|
||||
HIP_CHECK_ERROR(
|
||||
MemcpyParam2DAdapter<async>()(dst, width - 1, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("srcPitch < WidthInBytes") {
|
||||
HIP_CHECK_ERROR(
|
||||
MemcpyParam2DAdapter<async>()(dst, dpitch, src, width - 1, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("dpitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, static_cast<size_t>(attr) + 1, src, spitch,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("spitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, dpitch, src, static_cast<size_t>(attr) + 1,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
|
||||
SECTION("WidthInBytes + srcXInBytes > srcPitch") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(spitch - width + 1, 0, 0))(
|
||||
dst, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("WidthInBytes + dstXInBytes > dstPitch") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(0, 0, 0),
|
||||
make_hipExtent(dpitch - width + 1, 0, 0))(
|
||||
dst, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("srcY out of bounds") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(0, 1, 0))(dst, dpitch, src, spitch,
|
||||
width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("dstY out of bounds") {
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(0, 0, 0), make_hipExtent(0, 1, 0))(
|
||||
dst, dpitch, src, spitch, width, height, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235
|
||||
SECTION("Invalid stream") {
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
HIP_CHECK(hipStreamDestroy(stream_guard.stream()));
|
||||
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, dpitch, src, spitch, width, height, kind,
|
||||
stream_guard.stream()),
|
||||
hipErrorContextIsDestroyed);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(char)};
|
||||
constexpr auto memsetval{100};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&B_h, nullptr, nullptr,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
// Device to Host
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Destination Pitch is 0") {
|
||||
desc.dstPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
SECTION("Host to device") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Source Pitch is 0") {
|
||||
desc.srcPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
SECTION("Device to host") {
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
|
||||
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Height is 0") {
|
||||
desc.Height = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
SECTION("Host to host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
|
||||
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
|
||||
cols * sizeof(int), rows, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Width is 0") {
|
||||
desc.Height = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
SECTION("Device to device") {
|
||||
LinearAllocGuard2D<int> src_alloc(cols, rows);
|
||||
LinearAllocGuard2D<int> dst_alloc(cols, rows);
|
||||
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies the negative scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(float)};
|
||||
constexpr auto memsetval{100};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
// Device to Host
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Null Pointer to Source Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to Destination Device Pointer") {
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = A_h;
|
||||
desc.srcDevice = hipDeviceptr_t(A_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to both Src & Dst Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Width > src/dest pitches") {
|
||||
desc.WidthInBytes = pitch_A+1;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,441 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the following scenarios of hipMemcpyParam2DAsync API
|
||||
1. Negative Scenarios
|
||||
2. Extent Validation Scenarios
|
||||
3. D2D copy for different datatypes
|
||||
4. H2D and D2H copy for different datatypes
|
||||
5. Device context change scenario where memory allocated in one GPU
|
||||
stream created in another GPU
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
static constexpr size_t NUM_W{10};
|
||||
static constexpr size_t NUM_H{10};
|
||||
/*
|
||||
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
|
||||
* Where Memory is allocated in GPU-0 and stream is created in GPU-1
|
||||
*
|
||||
* Input: Intializing "A_d" device variable with "C_h" host variable
|
||||
* Output: "A_d" device variable to "E_d" device variable
|
||||
*
|
||||
* Validating the result by copying "E_d" to "A_h" and checking
|
||||
* it with the initalized data "C_h".
|
||||
*
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
|
||||
"[hipMemcpyParam2DAsync]", char, float, int, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
TestType *E_d{nullptr};
|
||||
size_t pitch_E;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
|
||||
&pitch_E, width, NUM_H));
|
||||
|
||||
// Initalizing A_d with C_h
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width,
|
||||
NUM_W*sizeof(TestType), NUM_H,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = E_d;
|
||||
desc.dstDevice = hipDeviceptr_t(E_d);
|
||||
desc.dstPitch = pitch_E;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Copying the result E_d to A_h host variable
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
|
||||
NUM_W*sizeof(TestType), NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(E_d));
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
|
||||
* Input: Intializing "A_d" device variable with "C_h" host variable
|
||||
* Output: "A_d" device variable to "E_d" device variable
|
||||
*
|
||||
* Validating the result by copying "E_d" to "A_h" and checking
|
||||
* it with the initalized data "C_h".
|
||||
*
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-D2D", "[hipMemcpyParam2DAsync]", char,
|
||||
int, float, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
TestType *E_d;
|
||||
size_t pitch_E;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
|
||||
&pitch_E, width, NUM_H));
|
||||
|
||||
// Initializing A_d with C_h
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
|
||||
NUM_W*sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = E_d;
|
||||
desc.dstDevice = hipDeviceptr_t(E_d);
|
||||
desc.dstPitch = pitch_E;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Copying the result E_d to A_h host variable
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
|
||||
NUM_W*sizeof(TestType), NUM_H, hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2DAsync API
|
||||
* H2D case:
|
||||
* Input: "C_h" host variable initialized with default data
|
||||
* Output: "A_d" device variable
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "A_d" device variable from the previous output
|
||||
* OutPut: "A_h" variable
|
||||
*
|
||||
* Validating the result by comparing "A_h" to "C_h"
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-H2D-D2H", "[hipMemcpyParam2DAsync]",
|
||||
char, int, float, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
// 1 refers to pinned host memory and 0 refers
|
||||
// to unpinned memory
|
||||
auto memory_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr},
|
||||
*A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
hipStream_t stream;
|
||||
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Based on memory type (pinned/unpinned) allocating memory
|
||||
if (memory_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
}
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
// Host to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = C_h;
|
||||
desc.srcDevice = hipDeviceptr_t(C_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(A_d);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Device to Host
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
if (memory_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This testcase verifies the extent validation scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_ExtentValidation") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(char)};
|
||||
constexpr auto memsetval{100};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&B_h, nullptr, nullptr,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
// Device to Host
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Destination Pitch is 0") {
|
||||
desc.dstPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Source Pitch is 0") {
|
||||
desc.srcPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Height is 0") {
|
||||
desc.Height = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
}
|
||||
|
||||
SECTION("Width is 0") {
|
||||
desc.Height = 0;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies the negative scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(float)};
|
||||
constexpr auto memsetval{100};
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Allocating and Initializing the data
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
// Device to Host
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Null Pointer to Source Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to Destination Device Pointer") {
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = A_h;
|
||||
desc.srcDevice = hipDeviceptr_t(A_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to both Src & Dst Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Width > src/dest pitches") {
|
||||
desc.WidthInBytes = pitch_A+1;
|
||||
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
@@ -0,0 +1,337 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the following scenarios of hipMemcpyParam2D API
|
||||
1. Negative Scenarios
|
||||
2. Extent Validation Scenarios
|
||||
3. D2D copy for different datatypes
|
||||
4. H2D and D2H copy for different datatypes
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
static constexpr size_t NUM_W{10};
|
||||
static constexpr size_t NUM_H{10};
|
||||
/*
|
||||
* This testcase verifies D2D functionality of hipMemcpyParam2D API
|
||||
* Input: Intializing "A_d" device variable with "C_h" host variable
|
||||
* Output: "A_d" device variable to "E_d" device variable
|
||||
*
|
||||
* Validating the result by copying "E_d" to "A_h" and checking
|
||||
* it with the initalized data "C_h".
|
||||
*
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-D2D", "[hipMemcpyParam2D]", char, float, int,
|
||||
double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
// Initialize and Allocating Memory
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
char *E_d;
|
||||
size_t pitch_E;
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
|
||||
&pitch_E, width, NUM_H));
|
||||
|
||||
// Initalizing A_d with C_h
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
|
||||
NUM_W * sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
|
||||
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = E_d;
|
||||
desc.dstDevice = hipDeviceptr_t(E_d);
|
||||
desc.dstPitch = pitch_E;
|
||||
desc.WidthInBytes = NUM_W * sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
|
||||
// Copying E_d to A_h
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
|
||||
NUM_W * sizeof(TestType), NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2D API
|
||||
* H2D case:
|
||||
* Input: "C_h" host variable initialized with default data
|
||||
* Output: "A_d" device variable
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "A_d" device variable from the previous output
|
||||
* OutPut: "A_h" variable
|
||||
*
|
||||
* Validating the result by comparing "A_h" to "C_h"
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-H2D-D2H", "[hipMemcpyParam2D]", char, float,
|
||||
int, double, long double) {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
// 1 refers to pinned host memory and 0 refers
|
||||
// to unpinned memory
|
||||
auto memory_type = GENERATE(0, 1);
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Initialize and Allocating Memory
|
||||
TestType* A_h{nullptr}, *C_h{nullptr},
|
||||
*A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(TestType)};
|
||||
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
|
||||
// Based on memory type (pinned/unpinned) allocating memory
|
||||
if (memory_type) {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, true);
|
||||
} else {
|
||||
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
}
|
||||
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (!peerAccess) {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
} else {
|
||||
// Host to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = C_h;
|
||||
desc.srcDevice = hipDeviceptr_t(C_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(A_d);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
|
||||
// Device to Host
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W*sizeof(TestType);
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
|
||||
// Validating the result
|
||||
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
if (memory_type) {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, true);
|
||||
} else {
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
|
||||
A_h, nullptr, C_h, false);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This testcase verifies the extent validation scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_ExtentValidation") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
// Allocating memory and Initializing the data
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(char)};
|
||||
constexpr auto memsetval{100};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
|
||||
&B_h, nullptr, nullptr,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
|
||||
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
// Device to Host
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Destination Pitch is 0") {
|
||||
desc.dstPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Source Pitch is 0") {
|
||||
desc.srcPitch = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Height is 0") {
|
||||
desc.Height = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
}
|
||||
|
||||
SECTION("Width is 0") {
|
||||
desc.WidthInBytes = 0;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
|
||||
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* This testcase verifies the negative scenarios
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyParam2D_Negative") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating and Initializing the data
|
||||
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
|
||||
* A_d{nullptr};
|
||||
size_t pitch_A;
|
||||
size_t width{NUM_W * sizeof(float)};
|
||||
constexpr auto memsetval{100};
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
|
||||
&pitch_A, width, NUM_H));
|
||||
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
|
||||
&A_h, &B_h, &C_h,
|
||||
width*NUM_H, false);
|
||||
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
|
||||
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
|
||||
|
||||
hip_Memcpy2D desc = {};
|
||||
desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
desc.srcHost = A_d;
|
||||
desc.srcDevice = hipDeviceptr_t(A_d);
|
||||
desc.srcPitch = pitch_A;
|
||||
desc.dstMemoryType = hipMemoryTypeHost;
|
||||
desc.dstHost = A_h;
|
||||
desc.dstDevice = hipDeviceptr_t(A_h);
|
||||
desc.dstPitch = width;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
|
||||
SECTION("Null Pointer to Source Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to Destination Device Pointer") {
|
||||
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
|
||||
desc.srcMemoryType = hipMemoryTypeHost;
|
||||
desc.srcHost = A_h;
|
||||
desc.srcDevice = hipDeviceptr_t(A_h);
|
||||
desc.srcPitch = width;
|
||||
desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
desc.dstHost = A_d;
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstPitch = pitch_A;
|
||||
desc.WidthInBytes = NUM_W;
|
||||
desc.Height = NUM_H;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Null Pointer to both Src & Dst Device Pointer") {
|
||||
desc.srcDevice = hipDeviceptr_t(nullptr);
|
||||
desc.dstDevice = hipDeviceptr_t(nullptr);
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Width > src/dest pitches") {
|
||||
desc.WidthInBytes = pitch_A+1;
|
||||
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocating the Memory
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
|
||||
A_h, B_h, C_h, false);
|
||||
}
|
||||
@@ -0,0 +1,521 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <variant>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <utils.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <hip/driver_types.h>
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault);
|
||||
|
||||
constexpr size_t cols = 127;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
|
||||
const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows);
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1);
|
||||
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width_logical(), device_alloc.height(), 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), kind));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
|
||||
PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
|
||||
device_alloc.height(), 1, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, bool enable_peer_access, typename F>
|
||||
void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault);
|
||||
|
||||
constexpr size_t cols = 127;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
const auto src_device = GENERATE_COPY(range(0, device_count));
|
||||
const auto dst_device = GENERATE_COPY(range(0, device_count));
|
||||
const size_t src_cols_mult = GENERATE(1, 2);
|
||||
|
||||
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
if constexpr (enable_peer_access) {
|
||||
if (src_device == dst_device) {
|
||||
return;
|
||||
}
|
||||
int can_access_peer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device);
|
||||
REQUIRE(can_access_peer);
|
||||
}
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
|
||||
}
|
||||
|
||||
LinearAllocGuard2D<int> src_alloc(cols * src_cols_mult, rows);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
LinearAllocGuard2D<int> dst_alloc(cols, rows);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows);
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1);
|
||||
// Using dst_alloc width and height to set only the elements that will be copied over to
|
||||
// dst_alloc
|
||||
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(),
|
||||
dst_alloc.height(), 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), kind));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost));
|
||||
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
|
||||
PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(),
|
||||
dst_alloc.height(), 1, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy2DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault);
|
||||
|
||||
constexpr size_t cols = 127;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
LinearAllocGuard2D<int> device_alloc(cols, rows);
|
||||
|
||||
const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch());
|
||||
|
||||
LinearAllocGuard<int> src_host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows);
|
||||
LinearAllocGuard<int> dst_host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows);
|
||||
|
||||
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
|
||||
PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
|
||||
device_alloc.height(), 1, f);
|
||||
|
||||
std::fill_n(dst_host_alloc.ptr(), device_alloc.width_logical() * rows, 0);
|
||||
|
||||
HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(), host_pitch,
|
||||
device_alloc.width(), device_alloc.height(), kind));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(),
|
||||
device_alloc.pitch(), device_alloc.width(), device_alloc.height(),
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(),
|
||||
device_alloc.height(), 1, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy2DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault);
|
||||
|
||||
constexpr size_t cols = 127;
|
||||
constexpr size_t rows = 128;
|
||||
|
||||
const size_t src_pitch = GENERATE_REF(cols * sizeof(int), cols * sizeof(int) + 64);
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc, src_pitch * rows);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc, cols * sizeof(int) * rows);
|
||||
|
||||
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
|
||||
PitchedMemorySet(src_host.ptr(), src_pitch, cols, rows, 1, f);
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_host.ptr(), cols * sizeof(int), src_host.ptr(), src_pitch,
|
||||
cols * sizeof(int), rows, kind));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), cols * sizeof(int), cols, rows, 1, f);
|
||||
}
|
||||
|
||||
// Synchronization behavior checks
|
||||
template <typename F>
|
||||
void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream) {
|
||||
LaunchDelayKernel(std::chrono::milliseconds{300}, kernel_stream);
|
||||
HIP_CHECK(memcpy_func());
|
||||
if (should_sync) {
|
||||
HIP_CHECK(hipStreamQuery(kernel_stream));
|
||||
} else {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy2DHtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto host_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
LinearAllocGuard<int> host_alloc(host_alloc_type, 32 * sizeof(int) * 32);
|
||||
LinearAllocGuard2D<int> device_alloc(32, 32);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), device_alloc.pitch(),
|
||||
host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height(), hipMemcpyHostToDevice),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy2DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::malloc, 32 * sizeof(int) * 32);
|
||||
LinearAllocGuard2D<int> device_alloc(32, 32);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(),
|
||||
device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height(), hipMemcpyDeviceToHost),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy2DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, 32 * sizeof(int) * 32);
|
||||
LinearAllocGuard2D<int> device_alloc(32, 32);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(),
|
||||
device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height(), hipMemcpyDeviceToHost),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy2DDtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard2D<int> src_alloc(32, 32);
|
||||
LinearAllocGuard2D<int> dst_alloc(32, 32);
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func, dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy2DHtoHSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
|
||||
LinearAllocGuard<int> src_alloc(src_alloc_type, 32 * sizeof(int) * 32);
|
||||
LinearAllocGuard<int> dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32);
|
||||
MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), 32 * sizeof(int), src_alloc.ptr(),
|
||||
32 * sizeof(int), 32 * sizeof(int), 32, hipMemcpyHostToHost),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy2DZeroWidthHeight(F memcpy_func, const hipStream_t stream = nullptr) {
|
||||
constexpr size_t cols = 63;
|
||||
constexpr size_t rows = 64;
|
||||
|
||||
const auto [width_mult, height_mult] =
|
||||
GENERATE(std::make_pair(0, 1), std::make_pair(1, 0), std::make_pair(0, 0));
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard2D<uint8_t> device_alloc(cols, rows);
|
||||
LinearAllocGuard<uint8_t> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows);
|
||||
std::fill_n(host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 42);
|
||||
HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 1, device_alloc.width(),
|
||||
device_alloc.height()));
|
||||
|
||||
HIP_CHECK(memcpy_func(host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(),
|
||||
device_alloc.pitch(), device_alloc.width() * width_mult,
|
||||
device_alloc.height() * height_mult, hipMemcpyDeviceToHost));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
device_alloc.width_logical() * device_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard2D<uint8_t> src_alloc(cols, rows);
|
||||
LinearAllocGuard2D<uint8_t> dst_alloc(cols, rows);
|
||||
LinearAllocGuard<uint8_t> host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows);
|
||||
HIP_CHECK(
|
||||
hipMemset2D(src_alloc.ptr(), src_alloc.pitch(), 1, src_alloc.width(), src_alloc.height()));
|
||||
HIP_CHECK(
|
||||
hipMemset2D(dst_alloc.ptr(), dst_alloc.pitch(), 42, dst_alloc.width(), dst_alloc.height()));
|
||||
HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width() * width_mult, dst_alloc.height() * height_mult,
|
||||
hipMemcpyDeviceToDevice));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
dst_alloc.width_logical() * dst_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard2D<uint8_t> device_alloc(cols, rows);
|
||||
LinearAllocGuard<uint8_t> src_host_alloc(LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * rows);
|
||||
LinearAllocGuard<uint8_t> dst_host_alloc(LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * rows);
|
||||
std::fill_n(src_host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 1);
|
||||
HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 42, device_alloc.width(),
|
||||
device_alloc.height()));
|
||||
HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(),
|
||||
device_alloc.width(), device_alloc.width() * width_mult,
|
||||
device_alloc.height() * height_mult, hipMemcpyHostToDevice));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(),
|
||||
device_alloc.pitch(), device_alloc.width(), device_alloc.height(),
|
||||
hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(dst_host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
device_alloc.width_logical() * device_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
const auto alloc_size = cols * rows;
|
||||
LinearAllocGuard<uint8_t> src_alloc(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
LinearAllocGuard<uint8_t> dst_alloc(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
std::fill_n(src_alloc.ptr(), alloc_size, 1);
|
||||
std::fill_n(dst_alloc.ptr(), alloc_size, 42);
|
||||
HIP_CHECK(memcpy_func(dst_alloc.ptr(), cols, src_alloc.ptr(), cols, cols * width_mult,
|
||||
rows * height_mult, hipMemcpyHostToHost));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto MemTypeHost() {
|
||||
return hipMemoryTypeHost;
|
||||
}
|
||||
|
||||
constexpr auto MemTypeDevice() {
|
||||
return hipMemoryTypeDevice;
|
||||
}
|
||||
|
||||
constexpr auto MemTypeArray() {
|
||||
return hipMemoryTypeArray;
|
||||
}
|
||||
|
||||
constexpr auto MemTypeUnified() {
|
||||
return hipMemoryTypeUnified;
|
||||
}
|
||||
|
||||
using PtrVariant = std::variant<void*, hipArray_t>;
|
||||
|
||||
template <bool async = false>
|
||||
constexpr auto MemcpyParam2DAdapter(const hipExtent src_offset = {0, 0, 0},
|
||||
const hipExtent dst_offset = {0, 0, 0}) {
|
||||
return [=](PtrVariant dst, size_t dpitch, PtrVariant src, size_t spitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, hipStream_t stream = nullptr) {
|
||||
hip_Memcpy2D parms = {};
|
||||
memset(&parms, 0x0, sizeof(hip_Memcpy2D));
|
||||
|
||||
if (std::holds_alternative<hipArray_t>(dst)) {
|
||||
parms.dstMemoryType = MemTypeArray();
|
||||
parms.dstArray = std::get<hipArray_t>(dst);
|
||||
} else {
|
||||
parms.dstPitch = dpitch;
|
||||
auto ptr = std::get<void*>(dst);
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyHostToHost:
|
||||
parms.dstMemoryType = MemTypeHost();
|
||||
parms.dstHost = ptr;
|
||||
break;
|
||||
case hipMemcpyDeviceToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
parms.dstMemoryType = MemTypeDevice();
|
||||
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
|
||||
break;
|
||||
case hipMemcpyDefault:
|
||||
parms.dstMemoryType = MemTypeUnified();
|
||||
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (std::holds_alternative<hipArray_t>(src)) {
|
||||
parms.srcMemoryType = MemTypeArray();
|
||||
parms.srcArray = std::get<hipArray_t>(src);
|
||||
} else {
|
||||
parms.srcPitch = spitch;
|
||||
auto ptr = std::get<void*>(src);
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyDeviceToDevice:
|
||||
parms.srcMemoryType = MemTypeDevice();
|
||||
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
|
||||
break;
|
||||
case hipMemcpyHostToDevice:
|
||||
case hipMemcpyHostToHost:
|
||||
parms.srcMemoryType = MemTypeHost();
|
||||
parms.srcHost = ptr;
|
||||
break;
|
||||
case hipMemcpyDefault:
|
||||
parms.srcMemoryType = MemTypeUnified();
|
||||
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
parms.WidthInBytes = width;
|
||||
parms.Height = height;
|
||||
parms.srcXInBytes = src_offset.width;
|
||||
parms.srcY = src_offset.height;
|
||||
parms.dstXInBytes = dst_offset.width;
|
||||
parms.dstY = dst_offset.height;
|
||||
|
||||
if constexpr (async) {
|
||||
return hipMemcpyParam2DAsync(&parms, stream);
|
||||
} else {
|
||||
return hipMemcpyParam2D(&parms);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyParam2DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
hipExtent extent{127 * sizeof(int), 128, 1};
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
|
||||
DrvArrayAllocGuard<int> src_array(extent);
|
||||
DrvArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemorySet(src_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
|
||||
// Host -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_host.ptr(), extent.width, extent.width,
|
||||
extent.height, hipMemcpyHostToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), 0, src_array.ptr(), 0, extent.width, extent.height,
|
||||
hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Host
|
||||
HIP_CHECK(memcpy_func(dst_host.ptr(), extent.width, dst_array.ptr(), 0, extent.width,
|
||||
extent.height, hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyParam2DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
hipExtent extent{127 * sizeof(int), 128, 1};
|
||||
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
|
||||
DrvArrayAllocGuard<int> src_array(extent);
|
||||
DrvArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
LinearAllocGuard3D<int> src_device(extent);
|
||||
LinearAllocGuard3D<int> dst_device(extent);
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
|
||||
src_device.height() / threads_per_block.y + 1, src_device.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_device.ptr(), src_device.pitch(), extent.width,
|
||||
extent.height, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), 0, src_array.ptr(), 0, extent.width, extent.height,
|
||||
hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Device
|
||||
HIP_CHECK(memcpy_func(dst_device.ptr(), dst_device.pitch(), dst_array.ptr(), 0, extent.width,
|
||||
extent.height, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(memcpy_func(host_alloc.ptr(), extent.width, dst_device.ptr(), dst_device.pitch(),
|
||||
extent.width, extent.height, hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
@@ -19,14 +19,61 @@
|
||||
# SOFTWARE.
|
||||
|
||||
# Common Tests - Test independent of all platforms
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
hipExtModuleLaunchKernel.cc
|
||||
hip_module_common.cc
|
||||
hipModuleLoad.cc
|
||||
hipModuleLoadData.cc
|
||||
hipModuleLoadDataEx.cc
|
||||
hipModuleUnload.cc
|
||||
hipModuleGetFunction.cc
|
||||
hipModuleLaunchKernel.cc
|
||||
hipModuleGetGlobal.cc
|
||||
hipModuleGetTexRef.cc
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc
|
||||
-o get_function_module.code
|
||||
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc)
|
||||
add_custom_target(get_function_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc
|
||||
-o launch_kernel_module.code
|
||||
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc)
|
||||
add_custom_target(launch_kernel_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc
|
||||
-o get_global_test_module.code
|
||||
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc)
|
||||
add_custom_target(get_global_test_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc
|
||||
-o get_tex_ref_module.code
|
||||
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc)
|
||||
add_custom_target(get_tex_ref_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code)
|
||||
|
||||
# Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906"
|
||||
# having space at the start/end of OFFLOAD_ARCH_STR can cause build failures
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
${TEST_SRC}
|
||||
hipExtModuleLaunchKernel.cc)
|
||||
|
||||
add_custom_target(empty_module.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/empty_module.code
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
|
||||
|
||||
add_custom_target(copyKernel.code
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=5 --genco ${OFFLOAD_ARCH_STR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc
|
||||
@@ -100,14 +147,30 @@ add_custom_target(copiousArgKernel17.code
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(RTCLIB "hiprtc")
|
||||
else()
|
||||
set(RTCLIB "nvrtc")
|
||||
endif()
|
||||
hip_add_exe_to_target(NAME ModuleTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC})
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS ${RTCLIB}
|
||||
COMMON_SHARED_SRC ${COMMON_SHARED_SRC}
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
|
||||
add_dependencies(ModuleTest get_function_module)
|
||||
add_dependencies(ModuleTest launch_kernel_module)
|
||||
add_dependencies(ModuleTest get_global_test_module)
|
||||
add_dependencies(ModuleTest get_tex_ref_module)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
add_dependencies(build_tests empty_module.code)
|
||||
add_dependencies(build_tests copyKernel.code copyKernel.s)
|
||||
if(UNIX)
|
||||
add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code
|
||||
copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
extern "C" {
|
||||
__global__ void GlobalKernel() {}
|
||||
|
||||
__device__ void DeviceKernel() {}
|
||||
}
|
||||
Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor Daha Fazla Göster
Yeni konuda referans
Bir kullanıcı engelle