diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index d60c272dd6..27f6568041 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -128,9 +128,32 @@ "Unit_hipEventIpc", "=== SWDEV-427101:Below test fails randomly in PSDB ===", "Unit_deviceAllocation_InOneThread_AccessInAllThreads", + "=== Below test is disabled due to defect EXSWHTEC-347 ===", + "Unit_hipPointerSetAttribute_Positive_SyncMemops", + "=== Below 2 tests are disable due to defect EXSWHTEC-356 ===", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", + "=== Below 4 tests are disable due to defect EXSWHTEC-355 ===", + "Unit_Device___hadd_Sanity_Positive", + "Unit_Device___uhadd_Sanity_Positive", + "Unit_Device___rhadd_Sanity_Positive", + "Unit_Device___urhadd_Sanity_Positive", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", + "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", + "Unit_Device_ilogbf_Accuracy_Positive", + "Unit_Device_ilogb_Accuracy_Positive", + "NOTE: The following test is disabled due to defect - EXSWHTEC-245", + "Unit_hipFuncGetAttribute_Negative_Parameters", "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipMemAddressReserve_Negative", @@ -309,39 +332,1072 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters", + "Unit_hipDeviceGetGraphMemAttribute_Positive_ReuseMemory", + "Unit_hipGraphAddNodeTypeEventWait_Positive_Basic", + "Unit_hipDrvGraphAddMemsetNode_Negative_Parameters", + "Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D", + "Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D", + "Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D", + "Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D", + "Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D", + "Unit_hipDrvGraphAddMemsetNode_hipMallocManaged", + "Unit_hipDrvGraphAddMemcpyNode_Negative_Parameters", + "Unit_hipPointerSetAttribute_Negative_Parameters", + "Unit_hipDrvGetErrorName_Positive_Basic", + "Unit_hipDrvGetErrorString_Positive_Basic", + "Unit_hipModuleLaunchKernel_Negative_Parameters", + "Unit_hipModuleGetTexRef_Positive_Basic", + "Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters", + "Unit_hipExtModuleLaunchKernel_Negative_Parameters", + "Unit_hipLaunchKernel_Negative_Parameters", + "Unit_Device_modf_modff_Negative_RTC", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - double", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address - float", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - double", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address - float", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - double", "SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it", "Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed", "Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice", "Unit_hipGraphUpload_Functional_multidevice_test", - #endif - #if defined VEGA20 - "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", - "Unit_deviceAllocation_Malloc_ComplexDataType", - #endif - #if defined MI100 - "=== Below test soft hang in stress test on 29/08/23 ===", - "Unit_hipMultiThreadStreams2", - "=== SWDEV-425248:Below tests failed in stress test on 11/10/23 ===", - "Unit_hipHostRegister_Memcpy - double", - "Unit_hipP2pLinkTypeAndHopFunc", - "=== SWDEV-426219:This test fails in integrity test & PSDB ===", - "Unit_hipLaunchParm", - "=== SWDEV-432554:Below test failed in stress test on 10/11/23 ===", - "Unit_hipMemcpy3DAsync_Positive_Basic", - "Unit_hipDrvMemcpy3DAsync_Positive_Basic", - "Print_Out_Attributes", - "Unit_hipExtGetLinkTypeAndHopCount_Positive_Basic", - "Unit_hipClock64_Positive_Basic", - "Unit_hipClock_Positive_Basic", - "=== Below tests failed in integrity test on 08/12/23 ===", - "Unit_hipMallocMipmappedArray_Negative_Parameters", - "Unit_hipFreeMipmappedArray_Negative_Parameters", - "Unit_hipGetMipmappedArrayLevel_Negative_Parameters", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_Assert_Positive_Basic_KernelFail", + "SWDEV-442805 : Below tests failed in stress test on 19/01/24 ===", + "Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - float", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - double", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - float", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - double", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - float", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - double", + "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t", + "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t", + "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t", + "=== SWDEV-444987 - Below tests fail in stress testing on 25/01/2023 ===", + "Unit_floatTM", + "Unit_TestMathFuncComplex", + "Unit_AtomicsWithRandomActiveLanesInWavefront_UniformInteger", + "Unit_AtomicsWithRandomActiveLanesInWavefront_DivergentInteger", + "Unit_hipGraphAddMemcpyNodeToSymbol_Positive_Basic", + "Unit_hipStreamBeginCapture_Positive_Functional", + "Unit_atomicAnd_Positive_SameAddress - int", + "Unit_atomicAnd_Positive_SameAddress - unsigned int", + "Unit_atomicAnd_Positive_SameAddress - unsigned long", + "Unit_atomicAnd_Positive_SameAddress - unsigned long long", + "Unit_atomicAnd_Positive_Adjacent_Addresses - int", + "Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicAnd_Positive_Scattered_Addresses - int", + "Unit_atomicAnd_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicAnd_Negative_Parameters_RTC", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address - int", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address - unsigned int", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address - unsigned long", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address - unsigned long long", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses - int", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned int", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long long", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses - int", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned int", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long", + "Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long long", + "Unit_atomicOr_Positive_SameAddress - int", + "Unit_atomicOr_Positive_SameAddress - unsigned int", + "Unit_atomicOr_Positive_SameAddress - unsigned long", + "Unit_atomicOr_Positive_SameAddress - unsigned long long", + "Unit_atomicOr_Positive_Adjacent_Addresses - int", + "Unit_atomicOr_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicOr_Positive_Scattered_Addresses - int", + "Unit_atomicOr_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicOr_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicOr_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicOr_Negative_Parameters_RTC", + "Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address - int", + "Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address - unsigned int", + "Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address - unsigned long", + "Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address - unsigned long long", + "Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses - int", + "Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned int", + "Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long", + "Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long long", + "Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses - int", + "Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned int", + "Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long", + "Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long long", + "Unit_atomicXor_Positive_SameAddress - int", + "Unit_atomicXor_Positive_SameAddress - unsigned int", + "Unit_atomicXor_Positive_SameAddress - unsigned long", + "Unit_atomicXor_Positive_SameAddress - unsigned long long", + "Unit_atomicXor_Positive_Adjacent_Addresses - int", + "Unit_atomicXor_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicXor_Positive_Scattered_Addresses - int", + "Unit_atomicXor_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicXor_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicXor_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicXor_Negative_Parameters_RTC", + "Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address - int", + "Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address - unsigned int", + "Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address - unsigned long", + "Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address - unsigned long long", + "Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses - int", + "Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned int", + "Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long", + "Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long long", + "Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses - int", + "Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned int", + "Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long", + "Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long long", + "Unit_atomicMin_Positive_SameAddress - int", + "Unit_atomicMin_Positive_SameAddress - unsigned int", + "Unit_atomicMin_Positive_SameAddress - unsigned long", + "Unit_atomicMin_Positive_SameAddress - unsigned long long", + "Unit_atomicMin_Positive_Adjacent_Addresses - int", + "Unit_atomicMin_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Adjacent_Addresses - float", + "Unit_atomicMin_Positive_Adjacent_Addresses - double", + "Unit_atomicMin_Positive_Scattered_Addresses - int", + "Unit_atomicMin_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicMin_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicMin_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Scattered_Addresses - float", + "Unit_atomicMin_Positive_Scattered_Addresses - double", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - float", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_atomicMin_Negative_Parameters_RTC", + "Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - float", + "Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address - double", + "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses - int", + "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned int", + "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long", + "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long long", + "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses - float", + "Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses - double", + "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses - int", + "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned int", + "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long", + "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long long", + "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses - float", + "Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses - double", + "Unit_atomicMax_Positive_SameAddress - int", + "Unit_atomicMax_Positive_SameAddress - unsigned int", + "Unit_atomicMax_Positive_SameAddress - unsigned long", + "Unit_atomicMax_Positive_SameAddress - unsigned long long", + "Unit_atomicMax_Positive_Adjacent_Addresses - int", + "Unit_atomicMax_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Adjacent_Addresses - float", + "Unit_atomicMax_Positive_Adjacent_Addresses - double", + "Unit_atomicMax_Positive_Scattered_Addresses - int", + "Unit_atomicMax_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicMax_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicMax_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Scattered_Addresses - float", + "Unit_atomicMax_Positive_Scattered_Addresses - double", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - float", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_atomicMax_Negative_Parameters_RTC", + "Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - float", + "Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address - double", + "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses - int", + "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned int", + "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long", + "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses - unsigned long long", + "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses - float", + "Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses - double", + "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses - int", + "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned int", + "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long", + "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses - unsigned long long", + "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses - float", + "Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses - double", + "Unit_safeAtomicMin_Positive_Adjacent_Addresses - float", + "Unit_safeAtomicMin_Positive_Adjacent_Addresses - double", + "Unit_safeAtomicMin_Positive_Scattered_Addresses - float", + "Unit_safeAtomicMin_Positive_Scattered_Addresses - double", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address - float", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address - double", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_unsafeAtomicMin_Positive_SameAddress - double", + "Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Scattered_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Scattered_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address - double", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_safeAtomicMax_Positive_Adjacent_Addresses - float", + "Unit_safeAtomicMax_Positive_Adjacent_Addresses - double", + "Unit_safeAtomicMax_Positive_Scattered_Addresses - float", + "Unit_safeAtomicMax_Positive_Scattered_Addresses - double", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address - float", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_unsafeAtomicMax_Positive_SameAddress - double", + "Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Scattered_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Scattered_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address - double", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - double", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - double", + "Unit_atomicExch_Positive - int", + "Unit_atomicExch_Positive - unsigned int", + "Unit_atomicExch_Positive - unsigned long", + "Unit_atomicExch_Positive - unsigned long long", + "Unit_atomicExch_Positive - float", + "Unit_atomicExch_Positive - double", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_exchange_Positive_Wavefront - int", + "Unit___hip_atomic_exchange_Positive_Wavefront - unsigned int", + "Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long", + "Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long long", + "Unit___hip_atomic_exchange_Positive_Wavefront - float", + "Unit___hip_atomic_exchange_Positive_Wavefront - double", + "Unit___hip_atomic_exchange_Positive_Workgroup - int", + "Unit___hip_atomic_exchange_Positive_Workgroup - unsigned int", + "Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long", + "Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long long", + "Unit___hip_atomic_exchange_Positive_Workgroup - float", + "Unit___hip_atomic_exchange_Positive_Workgroup - double", + "Unit_Kernel_Launch_bounds_Negative_OutOfBounds", + "Unit_Kernel_Launch_bounds_Negative_Parameters_RTC", + "Unit___threadfence_block_Positive_Basic_Peer", + "Unit___threadfence_Positive_Basic_Peer", + "Unit___threadfence_system_Positive_Basic_Peer", + "Unit_Device_sin_Accuracy_Positive - float", + "Unit_Device_sin_Accuracy_Positive - double", + "Unit_Device_cos_Accuracy_Positive - float", + "Unit_Device_cos_Accuracy_Positive - double", + "Unit_Device_tan_Accuracy_Positive - float", + "Unit_Device_tan_Accuracy_Positive - double", + "Unit_Device_asin_Accuracy_Positive - float", + "Unit_Device_asin_Accuracy_Positive - double", + "Unit_Device_acos_Accuracy_Positive - float", + "Unit_Device_acos_Accuracy_Positive - double", + "Unit_Device_atan_Accuracy_Positive - float", + "Unit_Device_atan_Accuracy_Positive - double", + "Unit_Device_sinh_Accuracy_Positive - float", + "Unit_Device_sinh_Accuracy_Positive - double", + "Unit_Device_cosh_Accuracy_Positive - float", + "Unit_Device_cosh_Accuracy_Positive - double", + "Unit_Device_tanh_Accuracy_Positive - float", + "Unit_Device_tanh_Accuracy_Positive - double", + "Unit_Device_asinh_Accuracy_Positive - float", + "Unit_Device_asinh_Accuracy_Positive - double", + "Unit_Device_acosh_Accuracy_Positive - float", + "Unit_Device_acosh_Accuracy_Positive - double", + "Unit_Device_atanh_Accuracy_Positive - float", + "Unit_Device_atanh_Accuracy_Positive - double", + "Unit_Device_sinpi_Accuracy_Positive - float", + "Unit_Device_sinpi_Accuracy_Positive - double", + "Unit_Device_cospi_Accuracy_Positive - float", + "Unit_Device_cospi_Accuracy_Positive - double", + "Unit_Device_tanpi_Accuracy_Positive - float", + "Unit_Device_tanpi_Accuracy_Positive - double", + "Unit_Device_atan2_Accuracy_Positive - float", + "Unit_Device_atan2_Accuracy_Positive - double", + "Unit_Device_sincos_Accuracy_Positive - float", + "Unit_Device_sincos_Accuracy_Positive - double", + "Unit_Device_sincospi_Accuracy_Positive - float", + "Unit_Device_sincospi_Accuracy_Positive - double", + "Unit_Device_fabs_Accuracy_Positive - float", + "Unit_Device_fabs_Accuracy_Positive - double", + "Unit_Device_copysign_Accuracy_Positive - float", + "Unit_Device_copysign_Accuracy_Positive - double", + "Unit_Device_fmax_Accuracy_Positive - float", + "Unit_Device_fmax_Accuracy_Positive - double", + "Unit_Device_fmin_Accuracy_Positive - float", + "Unit_Device_fmin_Accuracy_Positive - double", + "Unit_Device_nextafter_Accuracy_Positive - float", + "Unit_Device_nextafter_Accuracy_Positive - double", + "Unit_Device_fma_Accuracy_Positive - float", + "Unit_Device_fma_Accuracy_Positive - double", + "Unit_Device_fdividef_Accuracy_Positive", + "Unit_Device_isfinite_Accuracy_Positive - float", + "Unit_Device_isfinite_Accuracy_Positive - double", + "Unit_Device_isinf_Accuracy_Positive - float", + "Unit_Device_isinf_Accuracy_Positive - double", + "Unit_Device_isnan_Accuracy_Positive - float", + "Unit_Device_isnan_Accuracy_Positive - double", + "Unit_Device_signbit_Accuracy_Positive - float", + "Unit_Device_signbit_Accuracy_Positive - double", + "Unit_Device_fmod_Accuracy_Positive - float", + "Unit_Device_fmod_Accuracy_Positive - double", + "Unit_Device_remainder_Accuracy_Positive - float", + "Unit_Device_remainder_Accuracy_Positive - double", + "Unit_Device_fdim_Accuracy_Positive - float", + "Unit_Device_fdim_Accuracy_Positive - double", + "Unit_Device_trunc_Accuracy_Positive - float", + "Unit_Device_trunc_Accuracy_Positive - double", + "Unit_Device_round_Accuracy_Positive - float", + "Unit_Device_round_Accuracy_Positive - double", + "Unit_Device_rint_Accuracy_Positive - float", + "Unit_Device_rint_Accuracy_Positive - double", + "Unit_Device_nearbyint_Accuracy_Positive - float", + "Unit_Device_nearbyint_Accuracy_Positive - double", + "Unit_Device_ceil_Accuracy_Positive - float", + "Unit_Device_ceil_Accuracy_Positive - double", + "Unit_Device_floor_Accuracy_Positive - float", + "Unit_Device_floor_Accuracy_Positive - double", + "Unit_Device_lrint_Accuracy_Positive - float", + "Unit_Device_lrint_Accuracy_Positive - double", + "Unit_Device_lround_Accuracy_Positive - float", + "Unit_Device_lround_Accuracy_Positive - double", + "Unit_Device_llrint_Accuracy_Positive - float", + "Unit_Device_llrint_Accuracy_Positive - double", + "Unit_Device_llround_Accuracy_Positive - float", + "Unit_Device_llround_Accuracy_Positive - double", + "Unit_Device_remquo_Accuracy_Positive - float", + "Unit_Device_remquo_Accuracy_Positive - double", + "Unit_Device_modf_Accuracy_Positive - float", + "Unit_Device_modf_Accuracy_Positive - double", + "=== Below tests cause timeout in stress test of 09/02/24 ===", + "Unit_Device___half2half2_Accuracy_Positive", + "Unit_Device_make_half2_Accuracy_Positive", + "Unit_Device___halves2half2_Accuracy_Positive", + "Unit_Device___low2half_Accuracy_Positive", + "Unit_Device___high2half_Accuracy_Positive", + "Unit_Device___low2half2_Accuracy_Positive", + "Unit_Device___high2half2_Accuracy_Positive", + "Unit_Device___lowhigh2highlow_Accuracy_Positive", + "Unit_Device___lows2half2_Accuracy_Positive", + "Unit_Device___highs2half2_Accuracy_Positive", + "Unit_Device___float2half2_rn_Accuracy_Positive", + "Unit_Device___floats2half2_rn_Accuracy_Positive", + "Unit_Device___float22half2_rn_Accuracy_Positive", + "Unit_Device___low2float_Accuracy_Positive", + "Unit_Device___high2float_Accuracy_Positive", + "Unit_Device___half22float2_Accuracy_Positive", + "Unit_Device_hcos_Accuracy_Positive", + "Unit_Device_h2cos_Accuracy_Positive", + "Unit_Device_hsin_Accuracy_Positive", + "Unit_Device_h2sin_Accuracy_Positive", + "Unit_Device_hexp_Accuracy_Positive", + "Unit_Device_h2exp_Accuracy_Positive", + "Unit_Device_hexp10_Accuracy_Positive", + "Unit_Device_h2exp10_Accuracy_Positive", + "Unit_Device_hexp2_Accuracy_Positive", + "Unit_Device_h2exp2_Accuracy_Positive", + "Unit_Device_hlog_Accuracy_Positive", + "Unit_Device_h2log_Accuracy_Positive", + "Unit_Device_hlog10_Accuracy_Positive", + "Unit_Device_h2log10_Accuracy_Positive", + "Unit_Device_hlog2_Accuracy_Positive", + "Unit_Device_h2log2_Accuracy_Positive", + "Unit_Device_hsqrt_Accuracy_Positive", + "Unit_Device_h2sqrt_Accuracy_Positive", + "Unit_Device_hceil_Accuracy_Positive", + "Unit_Device_h2ceil_Accuracy_Positive", + "Unit_Device_hfloor_Accuracy_Positive", + "Unit_Device_h2floor_Accuracy_Positive", + "Unit_Device_htrunc_Accuracy_Positive", + "Unit_Device_h2trunc_Accuracy_Positive", + "Unit_Device_hrcp_Accuracy_Positive", + "Unit_Device_h2rcp_Accuracy_Positive", + "Unit_Device_hrsqrt_Accuracy_Positive", + "Unit_Device_h2rsqrt_Accuracy_Positive", + "Unit_Device_hrint_Accuracy_Positive", + "Unit_Device_h2rint_Accuracy_Positive", + "Unit_Device___habs_Accuracy_Positive", + "Unit_Device___habs2_Accuracy_Positive", + "Unit_Device___hneg_Accuracy_Positive", + "Unit_Device___hneg2_Accuracy_Positive", + "Unit_Device___hadd_wrapper_Accuracy_Positive", + "Unit_Device___hadd2_Accuracy_Positive", + "Unit_Device___hadd_sat_Accuracy_Positive", + "Unit_Device___hadd2_sat_Accuracy_Positive", + "Unit_Device___hsub_Accuracy_Positive", + "Unit_Device___hsub2_Accuracy_Positive", + "Unit_Device___hsub_sat_Accuracy_Positive", + "Unit_Device___hsub2_sat_Accuracy_Positive", + "Unit_Device___hmul_Accuracy_Positive", + "Unit_Device___hmul2_Accuracy_Positive", + "Unit_Device___hmul_sat_Accuracy_Positive", + "Unit_Device___hmul2_sat_Accuracy_Positive", + "Unit_Device___hdiv_Accuracy_Positive", + "Unit_Device___h2div_Accuracy_Positive", + "Unit_Device___hfma_Accuracy_Positive", + "Unit_Device___hfma2_Accuracy_Positive", + "Unit_Device___hfma_sat_Accuracy_Positive", + "Unit_Device___hfma2_sat_Accuracy_Positive", + "Unit_Device___hisinf_Accuracy_Positive", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___heq_Accuracy_Positive", + "Unit_Device___hbeq2_Accuracy_Positive", + "Unit_Device___hequ_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___heq2_Accuracy_Positive", + "Unit_Device___hequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hneu_Accuracy_Positive", + "Unit_Device___hbneu2_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hneu2_Accuracy_Positive", + "Unit_Device___hge_Accuracy_Positive", + "Unit_Device___hbge2_Accuracy_Positive", + "Unit_Device___hgeu_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hge2_Accuracy_Positive", + "Unit_Device___hgeu2_Accuracy_Positive", + "Unit_Device___hgt_Accuracy_Positive", + "Unit_Device___hbgt2_Accuracy_Positive", + "Unit_Device___hgtu_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hgt2_Accuracy_Positive", + "Unit_Device___hgtu2_Accuracy_Positive", + "Unit_Device___hle_Accuracy_Positive", + "Unit_Device___hble2_Accuracy_Positive", + "Unit_Device___hleu_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hle2_Accuracy_Positive", + "Unit_Device___hleu2_Accuracy_Positive", + "Unit_Device___hlt_Accuracy_Positive", + "Unit_Device___hblt2_Accuracy_Positive", + "Unit_Device___hltu_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", + "Unit_Device___hlt2_Accuracy_Positive", + "Unit_Device___hltu2_Accuracy_Positive", + "Unit_Device___hmax_Accuracy_Positive", + "Unit_Device___hmin_Accuracy_Positive", + "Unit_Device___hmax_nan_Accuracy_Positive", + "Unit_Device___hmin_nan_Accuracy_Positive", + "Unit_Device___half2int_rn_Accuracy_Positive", + "Unit_Device___half2int_rz_Accuracy_Positive", + "Unit_Device___half2int_rd_Accuracy_Positive", + "Unit_Device___half2int_ru_Accuracy_Positive", + "Unit_Device___half2uint_rn_Accuracy_Positive", + "Unit_Device___half2uint_rz_Accuracy_Positive", + "Unit_Device___half2uint_rd_Accuracy_Positive", + "Unit_Device___half2uint_ru_Accuracy_Positive", + "Unit_Device___half2short_rn_Accuracy_Positive", + "Unit_Device___half2short_rz_Accuracy_Positive", + "Unit_Device___half2short_rd_Accuracy_Positive", + "Unit_Device___half2short_ru_Accuracy_Positive", + "Unit_Device___half2ushort_rn_Accuracy_Positive", + "Unit_Device___half2ushort_rz_Accuracy_Positive", + "Unit_Device___half2ushort_rd_Accuracy_Positive", + "Unit_Device___half2ushort_ru_Accuracy_Positive", + "Unit_Device___half2ll_rn_Accuracy_Positive", + "Unit_Device___half2ll_rz_Accuracy_Positive", + "Unit_Device___half2ll_rd_Accuracy_Positive", + "Unit_Device___half2ll_ru_Accuracy_Positive", + "Unit_Device___half2ull_rn_Accuracy_Positive", + "Unit_Device___half2ull_rz_Accuracy_Positive", + "Unit_Device___half2ull_rd_Accuracy_Positive", + "Unit_Device___half2ull_ru_Accuracy_Positive", + "Unit_Device___half_as_short_Accuracy_Positive", + "Unit_Device___half_as_ushort_Accuracy_Positive", + "Unit_Device___int2half_rn_Accuracy_Positive", + "Unit_Device___int2half_rz_Accuracy_Positive", + "Unit_Device___int2half_rd_Accuracy_Positive", + "Unit_Device___int2half_ru_Accuracy_Positive", + "Unit_Device___uint2half_rn_Accuracy_Positive", + "Unit_Device___uint2half_rz_Accuracy_Positive", + "Unit_Device___uint2half_rd_Accuracy_Positive", + "Unit_Device___uint2half_ru_Accuracy_Positive", + "Unit_Device___short2half_rn_Accuracy_Positive", + "Unit_Device___short2half_rz_Accuracy_Positive", + "Unit_Device___short2half_rd_Accuracy_Positive", + "Unit_Device___short2half_ru_Accuracy_Positive", + "Unit_Device___ushort2half_rn_Accuracy_Positive", + "Unit_Device___ushort2half_rz_Accuracy_Positive", + "Unit_Device___ushort2half_rd_Accuracy_Positive", + "Unit_Device___ushort2half_ru_Accuracy_Positive", + "Unit_Device___ll2half_rn_Accuracy_Positive", + "Unit_Device___ll2half_rz_Accuracy_Positive", + "Unit_Device___ll2half_rd_Accuracy_Positive", + "Unit_Device___ll2half_ru_Accuracy_Positive", + "Unit_Device___ull2half_rn_Accuracy_Positive", + "Unit_Device___ull2half_rz_Accuracy_Positive", + "Unit_Device___ull2half_rd_Accuracy_Positive", + "Unit_Device___ull2half_ru_Accuracy_Positive", + "Unit_Device___short_as_half_Accuracy_Positive", + "Unit_Device___ushort_as_half_Accuracy_Positive", + "Unit_Device___float2half_rn_Accuracy_Positive", + "Unit_Device___float2half_Accuracy_Positive", + "Unit_Device___half2float_Accuracy_Positive", + "Unit_Device___frcp_rn_Accuracy_Positive", + "Unit_Device___fsqrt_rn_Accuracy_Positive", + "Unit_Device___frsqrt_rn_Accuracy_Positive", + "Unit_Device___expf_Accuracy_Positive", + "Unit_Device___exp10f_Accuracy_Positive", + "Unit_Device___logf_Accuracy_Positive", + "Unit_Device___log2f_Accuracy_Positive", + "Unit_Device___log10f_Accuracy_Positive", + "Unit_Device___sinf_Accuracy_Positive", + "Unit_Device___sincosf_sin_Accuracy_Positive", + "Unit_Device___cosf_Accuracy_Positive", + "Unit_Device___sincosf_cos_Accuracy_Positive", + "Unit_Device___fadd_rn_Accuracy_Positive", + "Unit_Device___fsub_rn_Accuracy_Positive", + "Unit_Device___fmul_rn_Accuracy_Positive", + "Unit_Device___fdiv_rn_Accuracy_Positive", + "Unit_Device___fdividef_Accuracy_Positive", + "Unit_Device___fmaf_rn_Accuracy_Positive", + "Unit_Device___drcp_rn_Accuracy_Positive", + "Unit_Device___dsqrt_rn_Accuracy_Positive", + "Unit_Device___dadd_rn_Accuracy_Positive", + "Unit_Device___dsub_rn_Accuracy_Positive", + "Unit_Device___dmul_rn_Accuracy_Positive", + "Unit_Device___ddiv_rn_Accuracy_Positive", + "Unit_Device___fma_rn_Accuracy_Positive", + "Unit_Device_sqrtf_Accuracy_Positive", + "Unit_Device_sqrt_Accuracy_Positive", + "Unit_Device_rsqrtf_Accuracy_Positive", + "Unit_Device_rsqrt_Accuracy_Positive", + "Unit_Device_cbrt_Accuracy_Positive - float", + "Unit_Device_cbrt_Accuracy_Positive - double", + "Unit_Device_rcbrtf_Accuracy_Positive", + "Unit_Device_rcbrt_Accuracy_Positive", + "Unit_Device_hypot_Accuracy_Positive - float", + "Unit_Device_hypot_Accuracy_Positive - double", + "Unit_Device_rhypot_Accuracy_Positive - float", + "Unit_Device_rhypot_Accuracy_Positive - double", + "Unit_Device_norm3d_Accuracy_Positive - float", + "Unit_Device_norm3d_Accuracy_Positive - double", + "Unit_Device_rnorm3d_Accuracy_Positive - float", + "Unit_Device_rnorm3d_Accuracy_Positive - double", + "Unit_Device_norm4d_Accuracy_Positive - float", + "Unit_Device_norm4d_Accuracy_Positive - double", + "Unit_Device_rnorm4d_Accuracy_Positive - float", + "Unit_Device_rnorm4d_Accuracy_Positive - double", + "Unit_Device_exp_Accuracy_Positive - float", + "Unit_Device_exp_Accuracy_Positive - double", + "Unit_Device_exp2_Accuracy_Positive - float", + "Unit_Device_exp2_Accuracy_Positive - double", + "Unit_Device_expm1_Accuracy_Positive - float", + "Unit_Device_expm1_Accuracy_Positive - double", + "Unit_Device_exp10f_Accuracy_Positive", + "Unit_Device_exp10_Accuracy_Positive", + "Unit_Device_frexpf_Accuracy_Positive", + "Unit_Device_frexp_Accuracy_Positive", + "Unit_Device_pow_Accuracy_Positive - float", + "Unit_Device_pow_Accuracy_Positive - double", + "Unit_Device_ldexp_Accuracy_Positive - float", + "Unit_Device_ldexp_Accuracy_Positive - double", + "Unit_Device_powi_Accuracy_Positive - float", + "Unit_Device_powi_Accuracy_Positive - double", + "Unit_Device_scalbn_Accuracy_Positive - float", + "Unit_Device_scalbn_Accuracy_Positive - double", + "Unit_Device_scalbln_Accuracy_Positive - float", + "Unit_Device_scalbln_Accuracy_Positive - double", + "Unit_Device_log_Accuracy_Positive - float", + "Unit_Device_log_Accuracy_Positive - double", + "Unit_Device_log2_Accuracy_Positive - float", + "Unit_Device_log2_Accuracy_Positive - double", + "Unit_Device_log10_Accuracy_Positive - float", + "Unit_Device_log10_Accuracy_Positive - double", + "Unit_Device_log1p_Accuracy_Positive - float", + "Unit_Device_log1p_Accuracy_Positive - double", + "Unit_Device_logb_Accuracy_Positive - float", + "Unit_Device_logb_Accuracy_Positive - double", + "Unit_Device_ilogbf_Accuracy_Positive", + "Unit_Device_ilogb_Accuracy_Positive", + "Unit_Device_erf_Accuracy_Positive - float", + "Unit_Device_erf_Accuracy_Positive - double", + "Unit_Device_erfc_Accuracy_Positive - float", + "Unit_Device_erfc_Accuracy_Positive - double", + "Unit_Device_erfinvf_Accuracy_Positive", + "Unit_Device_erfinv_Accuracy_Positive", + "Unit_Device_erfcinvf_Accuracy_Positive", + "Unit_Device_erfcinv_Accuracy_Positive", + "Unit_Device_normcdff_Accuracy_Positive", + "Unit_Device_normcdf_Accuracy_Positive", + "Unit_Device_tgammaf_Accuracy_Limited_Positive", + "Unit_Device_tgamma_Accuracy_Limited_Positive", + "Unit_Device_lgammaf_Accuracy_Limited_Positive", + "Unit_Device_lgamma_Accuracy_Limited_Positive", + "Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive", + "Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive", + "Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive", + "Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive", + "Unit_Device_y0f_Accuracy_Limited_Positive", + "Unit_Device_y0_Accuracy_Limited_Positive", + "Unit_Device_y1f_Accuracy_Limited_Positive", + "Unit_Device_y1_Accuracy_Limited_Positive", + "Unit_Device_ynf_Accuracy_Limited_Positive", + "Unit_Device_yn_Accuracy_Limited_Positive", + "Unit_Device_j0f_Accuracy_Limited_Positive", + "Unit_Device_j0_Accuracy_Limited_Positive", + "Unit_Device_j1f_Accuracy_Limited_Positive", + "Unit_Device_j1_Accuracy_Limited_Positive", + "Unit_Device_jnf_Accuracy_Limited_Positive", + "Unit_Device_jn_Accuracy_Limited_Positive", + "Unit_Device___double2int_rd_Positive", + "Unit_Device___double2int_rn_Positive", + "Unit_Device___double2int_ru_Positive", + "Unit_Device___double2int_rz_Positive", + "Unit_Device___double2int_Negative_RTC", + "Unit_Device___double2uint_rd_Positive", + "Unit_Device___double2uint_rn_Positive", + "Unit_Device___double2uint_ru_Positive", + "Unit_Device___double2uint_rz_Positive", + "Unit_Device___double2uint_Negative_RTC", + "Unit_Device___double2ll_rd_Positive", + "Unit_Device___double2ll_rn_Positive", + "Unit_Device___double2ll_ru_Positive", + "Unit_Device___double2ll_rz_Positive", + "Unit_Device___double2ll_Negative_RTC", + "Unit_Device___double2ull_rd_Positive", + "Unit_Device___double2ull_rn_Positive", + "Unit_Device___double2ull_ru_Positive", + "Unit_Device___double2ull_rz_Positive", + "Unit_Device___double2ull_Negative_RTC", + "Unit_Device___double2float_rd_Positive", + "Unit_Device___double2float_rn_Positive", + "Unit_Device___double2float_ru_Positive", + "Unit_Device___double2float_rz_Positive", + "Unit_Device___double2float_Negative_RTC", + "Unit_Device___double2hiint_Positive", + "Unit_Device___double2hiint_Negative_RTC", + "Unit_Device___double2loint_Positive", + "Unit_Device___double2loint_Negative_RTC", + "Unit_Device___double_as_longlong_Positive", + "Unit_Device___double_as_longlong_Negative_RTC", + "Unit_Device___float2int_rd_Positive", + "Unit_Device___float2int_rn_Positive", + "Unit_Device___float2int_ru_Positive", + "Unit_Device___float2int_rz_Positive", + "Unit_Device___float2int_Negative_RTC", + "Unit_Device___float2uint_rd_Positive", + "Unit_Device___float2uint_rn_Positive", + "Unit_Device___float2uint_ru_Positive", + "Unit_Device___float2uint_rz_Positive", + "Unit_Device___float2uint_Negative_RTC", + "Unit_Device___float2ll_rd_Positive", + "Unit_Device___float2ll_rn_Positive", + "Unit_Device___float2ll_ru_Positive", + "Unit_Device___float2ll_rz_Positive", + "Unit_Device___float2ll_Negative_RTC", + "Unit_Device___float2ull_rd_Positive", + "Unit_Device___float2ull_rn_Positive", + "Unit_Device___float2ull_ru_Positive", + "Unit_Device___float2ull_rz_Positive", + "Unit_Device___float2ull_Negative_RTC", + "Unit_Device___float_as_int_Positive", + "Unit_Device___float_as_int_Negative_RTC", + "Unit_Device___float_as_uint_Positive", + "Unit_Device___float_as_uint_Negative_RTC", + "Unit_Device___int2float_rd_Positive", + "Unit_Device___int2float_rn_Positive", + "Unit_Device___int2float_ru_Positive", + "Unit_Device___int2float_rz_Positive", + "Unit_Device_int2float___Negative_RTC", + "Unit_Device___uint2float_rd_Positive", + "Unit_Device___uint2float_rn_Positive", + "Unit_Device___uint2float_ru_Positive", + "Unit_Device___uint2float_rz_Positive", + "Unit_Device___uint2float_Negative_RTC", + "Unit_Device___int2double_rn_Positive", + "Unit_Device___int2double_Negative_RTC", + "Unit_Device___uint2double_rn_Positive", + "Unit_Device___uint2double_Negative_RTC", + "Unit_Device___ll2float_rd_Positive", + "Unit_Device___ll2float_rn_Positive", + "Unit_Device___ll2float_ru_Positive", + "Unit_Device___ll2float_rz_Positive", + "Unit_Device___ll2float_Negative_RTC", + "Unit_Device___ull2float_rd_Positive", + "Unit_Device___ull2float_rn_Positive", + "Unit_Device___ull2float_ru_Positive", + "Unit_Device___ull2float_rz_Positive", + "Unit_Device___ull2float_Negative_RTC", + "Unit_Device___ll2double_rd_Positive", + "Unit_Device___ll2double_rn_Positive", + "Unit_Device___ll2double_ru_Positive", + "Unit_Device___ll2double_rz_Positive", + "Unit_Device___ll2double_Negative_RTC", + "Unit_Device___ull2double_rd_Positive", + "Unit_Device___ull2double_rn_Positive", + "Unit_Device___ull2double_ru_Positive", + "Unit_Device___ull2double_rz_Positive", + "Unit_Device___ull2double_Negative_RTC", + "Unit_Device___int_as_float_Positive", + "Unit_Device___int_as_float_Negative_RTC", + "Unit_Device___uint_as_float_Positive", + "Unit_Device___uint_as_float_Negative_RTC", + "Unit_Device___longlong_as_double_Positive", + "Unit_Device___longlong_as_double_Negative_RTC", + "Unit_Device___hiloint2double_Positive", + "Unit_Device___hiloint2double_Negative_RTC", + "Unit___hip_atomic_load_store_Positive_Acquire_Release", + "Unit___hip_atomic_exchange_Positive_Acquire_Release", + "Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release", + "Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_add_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_and_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_or_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_xor_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_min_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_max_Positive_Acquire_Release", + "Unit___hip_atomic_load_store_Positive_Sequential_Consistency", + "Unit___hip_atomic_exchange_Positive_Sequential_Consistency", + "Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency", + "Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - int", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - float", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - double", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - int", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - float", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - double", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double", + "Unit_atomicAdd_Positive - int", + "Unit_atomicAdd_Positive - unsigned int", + "Unit_atomicAdd_Positive - unsigned long", + "Unit_atomicAdd_Positive - unsigned long long", + "Unit_atomicAdd_Positive - float", + "Unit_atomicAdd_Positive - double", + "Unit_atomicAdd_Positive_Multi_Kernel - int", + "Unit_atomicAdd_Positive_Multi_Kernel - unsigned int", + "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long", + "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long", + "Unit_atomicAdd_Positive_Multi_Kernel - float", + "Unit_atomicAdd_Positive_Multi_Kernel - double", + "Unit_atomicAdd_Negative_Parameters_RTC", + "Unit_atomicAdd_system_Positive_Peer_GPUs - int", + "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int", + "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long", + "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long", + "Unit_atomicAdd_system_Positive_Peer_GPUs - float", + "Unit_atomicAdd_system_Positive_Peer_GPUs - double", + "Unit_atomicAdd_system_Positive_Host_And_GPU - int", + "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int", + "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long", + "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long", + "Unit_atomicAdd_system_Positive_Host_And_GPU - float", + "Unit_atomicAdd_system_Positive_Host_And_GPU - double", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double", + "Unit_unsafeAtomicAdd_Positive - float", + "Unit_unsafeAtomicAdd_Positive - double", + "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float", + "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double", + "Unit_safeAtomicAdd_Positive - float", + "Unit_safeAtomicAdd_Positive - double", + "Unit_safeAtomicAdd_Positive_Multi_Kernel - float", + "Unit_safeAtomicAdd_Positive_Multi_Kernel - double", + "Unit_atomicSub_Positive - int", + "Unit_atomicSub_Positive - unsigned int", + "Unit_atomicSub_Positive - unsigned long", + "Unit_atomicSub_Positive - unsigned long long", + "Unit_atomicSub_Positive - float", + "Unit_atomicSub_Positive - double", + "Unit_atomicSub_Positive_Multi_Kernel - int", + "Unit_atomicSub_Positive_Multi_Kernel - unsigned int", + "Unit_atomicSub_Positive_Multi_Kernel - unsigned long", + "Unit_atomicSub_Positive_Multi_Kernel - unsigned long long", + "Unit_atomicSub_Positive_Multi_Kernel - float", + "Unit_atomicSub_Positive_Multi_Kernel - double", + "Unit_atomicSub_Negative_Parameters_RTC", + "Unit_atomicSub_system_Positive_Peer_GPUs - int", + "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int", + "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long", + "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long", + "Unit_atomicSub_system_Positive_Peer_GPUs - float", + "Unit_atomicSub_system_Positive_Peer_GPUs - double", + "Unit_atomicSub_system_Positive_Host_And_GPU - int", + "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int", + "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long", + "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long", + "Unit_atomicSub_system_Positive_Host_And_GPU - float", + "Unit_atomicSub_system_Positive_Host_And_GPU - double", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double", + "Unit_atomicInc_Positive - unsigned int", + "Unit_atomicInc_Positive_Multi_Kernel - unsigned int", + "Unit_atomicInc_Negative_Parameters_RTC", + "Unit_atomicDec_Positive - unsigned int", + "Unit_atomicDec_Positive_Multi_Kernel - unsigned int", + "Unit_atomicDec_Negative_Parameters_RTC", + "Unit_atomicCAS_Positive - int", + "Unit_atomicCAS_Positive - unsigned int", + "Unit_atomicCAS_Positive - unsigned long long", + "Unit_atomicCAS_Positive_Multi_Kernel - int", + "Unit_atomicCAS_Positive_Multi_Kernel - unsigned int", + "Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long", + "Unit_atomicCAS_Negative_Parameters_RTC", + "Unit_atomicCAS_system_Positive_Peer_GPUs - int", + "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int", + "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long", + "Unit_atomicCAS_system_Positive_Host_And_GPU - int", + "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int", + "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long", + "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int", + "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int", + "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long", + "SWDEV-447384, SWDEV-447932: These tests fail in Navi31, Navi32 & Navi33", "Unit_hipMemPoolSetAccess_Negative_Parameters", - "Unit_hipMipmappedArrayCreate_Negative_Parameters", - "Unit_hipMipmappedArrayDestroy_Negative_Parameters", - "Unit_hipMipmappedArrayGetLevel_Negative_Parameters", - "=== SWDEV-438556:Below tests failed in stress test on 15/12/23 ===", "Unit_hipFreeAsync_Negative_Parameters", "Unit_hipMallocMipmappedArray_DiffSizes", "Unit_hipMallocMipmappedArray_MultiThread", @@ -365,11 +1421,69 @@ "Unit_hipFreeMipmappedArray_Negative_DoubleFree", "Unit_hipFreeMipmappedArrayMultiTArray - char", "Unit_hipFreeMipmappedArrayMultiTArray - int", + "Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Reused_Memory", + "Unit_hipMallocMipmappedArray_Negative_Parameters", + "Unit_hipFreeMipmappedArray_Negative_Parameters", + "Unit_hipGetMipmappedArrayLevel_Negative_Parameters", + "Unit_hipMipmappedArrayCreate_Negative_Parameters", + "Unit_hipMipmappedArrayDestroy_Negative_Parameters", + "Unit_hipMipmappedArrayGetLevel_Negative_Parameters", "Unit_Multi_Grid_Group_Getters_Positive_Basic", "Unit_Multi_Grid_Group_Getters_Positive_Base_Type", "Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions", - "Unit_Coalesced_Group_Getters_Positive_Basic", "Unit_Coalesced_Group_Getters_Via_Base_Type_Positive_Basic", + "Unit_Coalesced_Group_Shfl_Positive_Basic - int", + "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Shfl_Positive_Basic - long", + "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Shfl_Positive_Basic - long long", + "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Shfl_Positive_Basic - float", + "Unit_Coalesced_Group_Shfl_Positive_Basic - double", + "SWDEV-445928: These tests fail in PSDB stress test on 09/02/2024", + "Unit_hipGraphAddNodeTypeMemset_Positive_Basic - uint8_t", + "Unit_hipGraphAddNodeTypeMemset_Positive_Basic - uint16_t", + "Unit_hipGraphAddNodeTypeMemset_Positive_Basic - uint32_t", + "Unit_hipGraphAddNodeTypeMemcpy_Positive_Basic", + "Unit_hipMemAdvise_TstAlignedAllocMem_XNACK", + "Unit_hipArrayGetDescriptor_Positive_Basic", + "Unit_hipArrayGetDescriptor_Negative_Parameters", + "Unit_hipArrayGetInfo_Positive_Basic", + "Unit_hipArrayGetInfo_Negative_Parameters", + "Unit_hipArray3DGetDescriptor_Positive_Basic", + "Unit_hipArray3DGetDescriptor_Negative_Parameters", + "Unit_hipCreateSurfaceObject_Negative_Parameters", + "Unit_hipDestroySurfaceObject_Negative_Parameters", + "Unit_Device___float2half_rd_Accuracy_Limited_Positive", + "Unit_Device___float2half_ru_Accuracy_Limited_Positive", + "Unit_Device___float2half_rz_Accuracy_Limited_Positive", + "Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg", + "Unit_hipModuleLaunchCooperativeKernelMultiDevice_Positive_Basic", + "Unit_hipModuleLaunchCooperativeKernelMultiDevice_Negative_Parameters", + "Unit_hipModuleLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice", + #endif + #if defined VEGA20 + "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", + "Unit_deviceAllocation_Malloc_ComplexDataType", + #endif + #if defined MI100 + "=== Below test soft hang in stress test on 29/08/23 ===", + "Unit_hipMultiThreadStreams2", + "=== SWDEV-425248:Below tests failed in stress test on 11/10/23 ===", + "Unit_hipHostRegister_Memcpy - double", + "Unit_hipP2pLinkTypeAndHopFunc", + "=== SWDEV-426219:This test fails in integrity test & PSDB ===", + "Unit_hipLaunchParm", + "=== SWDEV-432554:Below test failed in stress test on 10/11/23 ===", + "Unit_hipMemcpy3DAsync_Positive_Basic", + "Unit_hipDrvMemcpy3DAsync_Positive_Basic", + "Print_Out_Attributes", + "Unit_hipExtGetLinkTypeAndHopCount_Positive_Basic", + "Unit_hipClock64_Positive_Basic", + "Unit_hipClock_Positive_Basic", + "=== Below tests failed in integrity test on 08/12/23 ===", + "=== SWDEV-438556:Below tests failed in stress test on 15/12/23 ===", + "Unit_Coalesced_Group_Getters_Positive_Basic", "Unit_Coalesced_Group_Getters_Via_Non_Member_Functions_Positive_Basic", "Unit_Coalesced_Group_Shfl_Up_Positive_Basic - int", "Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned int", @@ -387,20 +1501,14 @@ "Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long long", "Unit_Coalesced_Group_Shfl_Down_Positive_Basic - float", "Unit_Coalesced_Group_Shfl_Down_Positive_Basic - double", - "Unit_Coalesced_Group_Shfl_Positive_Basic - int", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int", - "Unit_Coalesced_Group_Shfl_Positive_Basic - long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - long long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - float", - "Unit_Coalesced_Group_Shfl_Positive_Basic - double", "Unit_Coalesced_Group_Sync_Positive_Basic - uint8_t", "Unit_Coalesced_Group_Sync_Positive_Basic - uint16_t", "Unit_Coalesced_Group_Sync_Positive_Basic - uint32_t", "Unit_Warp_Ballot_Positive_Basic", "Unit_Warp_Vote_Any_Positive_Basic", "Unit_Warp_Vote_All_Positive_Basic", + "=== SWDEV-443630 - Below tests failed in stress test on 19/01/23 ===", + "Unit_hipGetSetDevice_MultiThreaded", #endif #if defined MI2XX "Unit_hipStreamPerThread_DeviceReset_1", @@ -464,34 +1572,7 @@ "Unit_cache_coherency_cpu_gpu", "Unit_cache_coherency_gpu_gpu", "=== SWDEV-438556:Below tests failed in stress test on 15/12/23 ===", - "Unit_hipFreeAsync_Negative_Parameters", - "Unit_hipMallocMipmappedArray_DiffSizes", - "Unit_hipMallocMipmappedArray_MultiThread", - "Unit_hipMallocMipmappedArray_happy - char", - "Unit_hipMallocMipmappedArray_happy - uint2", - "Unit_hipMallocMipmappedArray_happy - int4", - "Unit_hipMallocMipmappedArray_happy - short4", - "Unit_hipMallocMipmappedArray_happy - float", - "Unit_hipMallocMipmappedArray_Negative_ZeroWidth", - "Unit_hipMallocMipmappedArray_Negative_ZeroHeight", - "Unit_hipMallocMipmappedArray_Negative_InvalidFlags", - "Unit_hipMallocMipmappedArray_Negative_InvalidFormat", - "Unit_hipMallocMipmappedArray_Negative_BadChannelLayout", - "Unit_hipMallocMipmappedArray_Negative_8BitFloat", - "Unit_hipMallocMipmappedArray_Negative_BadChannelSize", - "Unit_hipMallocMipmappedArray_Negative_NumericLimit", - "Unit_hipMallocMipmappedArray_Negative_NumLevels", - "Unit_hipGetMipmappedArrayLevel_Negative", - "Unit_hipFreeMipmappedArrayImplicitSyncArray - char", - "Unit_hipFreeMipmappedArrayImplicitSyncArray - float", - "Unit_hipFreeMipmappedArray_Negative_DoubleFree", - "Unit_hipFreeMipmappedArrayMultiTArray - char", - "Unit_hipFreeMipmappedArrayMultiTArray - int", - "Unit_Multi_Grid_Group_Getters_Positive_Basic", - "Unit_Multi_Grid_Group_Getters_Positive_Base_Type", - "Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions", "Unit_Coalesced_Group_Getters_Positive_Basic", - "Unit_Coalesced_Group_Getters_Via_Base_Type_Positive_Basic", "Unit_Coalesced_Group_Getters_Via_Non_Member_Functions_Positive_Basic", "Unit_Coalesced_Group_Shfl_Up_Positive_Basic - int", "Unit_Coalesced_Group_Shfl_Up_Positive_Basic - unsigned int", @@ -509,14 +1590,6 @@ "Unit_Coalesced_Group_Shfl_Down_Positive_Basic - unsigned long long", "Unit_Coalesced_Group_Shfl_Down_Positive_Basic - float", "Unit_Coalesced_Group_Shfl_Down_Positive_Basic - double", - "Unit_Coalesced_Group_Shfl_Positive_Basic - int", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int", - "Unit_Coalesced_Group_Shfl_Positive_Basic - long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - long long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - float", - "Unit_Coalesced_Group_Shfl_Positive_Basic - double", "Unit_Coalesced_Group_Sync_Positive_Basic - uint8_t", "Unit_Coalesced_Group_Sync_Positive_Basic - uint16_t", "Unit_Coalesced_Group_Sync_Positive_Basic - uint32_t", @@ -525,52 +1598,12 @@ "Unit_Warp_Vote_All_Positive_Basic", "=== SWDEV-439298: Below test failing in CQE staging ===", "Unit_hipCGMultiGridGroupType_Barrier", + "=== SWDEV-443630 : Below test failed in stress test on 19/01/24 ===", + "Unit_Multi_Grid_Group_Positive_Sync", #endif #if defined NAVI21 - "=== Below tests failed in stress test on 08/12/23 ===", - "Unit_hipMallocMipmappedArray_Negative_Parameters", - "Unit_hipFreeMipmappedArray_Negative_Parameters", - "Unit_hipGetMipmappedArrayLevel_Negative_Parameters", - "Unit_hipMemPoolSetAccess_Negative_Parameters", - "Unit_hipMipmappedArrayCreate_Negative_Parameters", - "Unit_hipMipmappedArrayDestroy_Negative_Parameters", - "Unit_hipMipmappedArrayGetLevel_Negative_Parameters", - "Below tests failed in stress test on 15/12/23 ===", - "Unit_hipFreeAsync_Negative_Parameters", - "Unit_hipMallocMipmappedArray_DiffSizes", - "Unit_hipMallocMipmappedArray_MultiThread", - "Unit_hipMallocMipmappedArray_happy - char", - "Unit_hipMallocMipmappedArray_happy - uint2", - "Unit_hipMallocMipmappedArray_happy - int4", - "Unit_hipMallocMipmappedArray_happy - short4", - "Unit_hipMallocMipmappedArray_happy - float", - "Unit_hipMallocMipmappedArray_Negative_ZeroWidth", - "Unit_hipMallocMipmappedArray_Negative_ZeroHeight", - "Unit_hipMallocMipmappedArray_Negative_InvalidFlags", - "Unit_hipMallocMipmappedArray_Negative_InvalidFormat", - "Unit_hipMallocMipmappedArray_Negative_BadChannelLayout", - "Unit_hipMallocMipmappedArray_Negative_8BitFloat", - "Unit_hipMallocMipmappedArray_Negative_BadChannelSize", - "Unit_hipMallocMipmappedArray_Negative_NumericLimit", - "Unit_hipMallocMipmappedArray_Negative_NumLevels", - "Unit_hipGetMipmappedArrayLevel_Negative", - "Unit_hipFreeMipmappedArrayImplicitSyncArray - char", - "Unit_hipFreeMipmappedArrayImplicitSyncArray - float", - "Unit_hipFreeMipmappedArray_Negative_DoubleFree", - "Unit_hipFreeMipmappedArrayMultiTArray - char", - "Unit_hipFreeMipmappedArrayMultiTArray - int", - "Unit_Multi_Grid_Group_Getters_Positive_Basic", - "Unit_Multi_Grid_Group_Getters_Positive_Base_Type", - "Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions", - "Unit_Coalesced_Group_Getters_Via_Base_Type_Positive_Basic", - "Unit_Coalesced_Group_Shfl_Positive_Basic - int", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned int", - "Unit_Coalesced_Group_Shfl_Positive_Basic - long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - long long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - unsigned long long", - "Unit_Coalesced_Group_Shfl_Positive_Basic - float", - "Unit_Coalesced_Group_Shfl_Positive_Basic - double", + "=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===", + "Unit_hipStreamBeginCapture_hipStreamPerThread", #endif #if defined NAVI3X "=== Below tests soft hang in stress test on 13/09/23 ===", @@ -583,6 +1616,21 @@ "Grid_Group_Getters_Via_Non_Member_Functions_Positive_Basic", "Grid_Group_Sync_Positive_Basic", "dynamic_loading_device_kernels_from_library", + "=== SWDEV-443630 - Below tests failed in stress test on 19/01/23 ===", + "Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device", + "Unit_hipIpcCloseMemHandle_Positive_Reference_Counting", + "Unit_hipStrmPerThrdDefault", + "=== SWDEV-445928: This test failed in PSDB stress test on 09/02/2024 ===", + "Unit_hipExtModuleLaunchKernel_Positive_Basic", + "=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===", + "Unit_hipGraphGetRootNodes_CapturedStream", + "Unit_hipStreamAddCaptureDependencies_Positive_Functional", + "Unit_hipMalloc3DArray_MaxTexture - float", + "Unit_hipStreamAttachMemAsync_Positive_AttachGlobal", + "Unit_hipStreamQuery_SubmitWorkOnStreamAndQueryNullStream", + "Unit_hipDeviceSynchronize_Functional", + "Unit_hipStreamPerThread_StreamQuery", + "Unit_hipMultiThreadStreams1_AsyncAsync", #endif "End of json" ] diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 3d762a5846..1b690c3ec7 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -211,6 +211,10 @@ "Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory", "=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===", "Unit_hipVectorTypes_test_on_device", + "Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4", + "Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4", + "=== Below test is disabled due to defect EXSWHTEC-347 ===", + "Unit_hipPointerSetAttribute_Positive_SyncMemops", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "NOTE: The following test is disabled due to defect - EXSWHTEC-241", @@ -222,6 +226,12 @@ "NOTE: The following test is disabled due to defect - EXSWHTEC-244", "Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters", "Unit_hipMemAddressFree_negative", + "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", + "Unit_Device_ilogbf_Accuracy_Positive", + "Unit_Device_ilogb_Accuracy_Positive", + "NOTE: The following test is disabled due to defect - EXSWHTEC-245", + "Unit_hipFuncGetAttribute_Negative_Parameters", + "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemCreate_ChkWithKerLaunch", @@ -387,6 +397,261 @@ "Performance_hipMemsetD32Async", "Performance_hipMemcpy2D_HostToHost", "Performance_hipMemcpy2DAsync_HostToHost", + "Unit_hipDeviceGetGraphMemAttribute_Positive_ReuseMemory", + "Unit_hipGraphAddNodeTypeEventWait_Positive_Basic", + "Unit_hipDrvGraphAddMemsetNode_Negative_Parameters", + "Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D", + "Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D", + "Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D", + "Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D", + "Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D", + "Unit_hipDrvGraphAddMemsetNode_hipMallocManaged", + "Unit_hipDrvGraphAddMemcpyNode_Negative_Parameters", + "Unit_tex1Dfetch_Positive_ReadModeElementType - char", + "Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char", + "Unit_tex1Dfetch_Positive_ReadModeElementType - short", + "Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short", + "Unit_tex1Dfetch_Positive_ReadModeElementType - int", + "Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int", + "Unit_tex1Dfetch_Positive_ReadModeElementType - float", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DGrad_Positive_ReadModeElementType - char", + "Unit_tex1DGrad_Positive_ReadModeElementType - unsigned char", + "Unit_tex1DGrad_Positive_ReadModeElementType - short", + "Unit_tex1DGrad_Positive_ReadModeElementType - unsigned short", + "Unit_tex1DGrad_Positive_ReadModeElementType - int", + "Unit_tex1DGrad_Positive_ReadModeElementType - unsigned int", + "Unit_tex1DGrad_Positive_ReadModeElementType - float", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - char", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned char", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - short", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned short", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - int", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned int", + "Unit_tex1DLayeredGrad_Positive_ReadModeElementType - float", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - char", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned char", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - short", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned short", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - int", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned int", + "Unit_tex1DLayeredLod_Positive_ReadModeElementType - float", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLod_Positive_ReadModeElementType - char", + "Unit_tex1DLod_Positive_ReadModeElementType - unsigned char", + "Unit_tex1DLod_Positive_ReadModeElementType - short", + "Unit_tex1DLod_Positive_ReadModeElementType - unsigned short", + "Unit_tex1DLod_Positive_ReadModeElementType - int", + "Unit_tex1DLod_Positive_ReadModeElementType - unsigned int", + "Unit_tex1DLod_Positive_ReadModeElementType - float", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex3D_Positive_ReadModeElementType - char", + "Unit_tex3D_Positive_ReadModeElementType - unsigned char", + "Unit_tex3D_Positive_ReadModeElementType - short", + "Unit_tex3D_Positive_ReadModeElementType - unsigned short", + "Unit_tex3D_Positive_ReadModeElementType - int", + "Unit_tex3D_Positive_ReadModeElementType - unsigned int", + "Unit_tex3D_Positive_ReadModeElementType - float", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - char", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - short", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex3DLod_Positive_ReadModeElementType - char", + "Unit_tex3DLod_Positive_ReadModeElementType - unsigned char", + "Unit_tex3DLod_Positive_ReadModeElementType - short", + "Unit_tex3DLod_Positive_ReadModeElementType - unsigned short", + "Unit_tex3DLod_Positive_ReadModeElementType - int", + "Unit_tex3DLod_Positive_ReadModeElementType - unsigned int", + "Unit_tex3DLod_Positive_ReadModeElementType - float", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex3DGrad_Positive_ReadModeElementType - char", + "Unit_tex3DGrad_Positive_ReadModeElementType - unsigned char", + "Unit_tex3DGrad_Positive_ReadModeElementType - short", + "Unit_tex3DGrad_Positive_ReadModeElementType - unsigned short", + "Unit_tex3DGrad_Positive_ReadModeElementType - int", + "Unit_tex3DGrad_Positive_ReadModeElementType - unsigned int", + "Unit_tex3DGrad_Positive_ReadModeElementType - float", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemap_Positive_ReadModeElementType - char", + "Unit_texCubemap_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemap_Positive_ReadModeElementType - short", + "Unit_texCubemap_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemap_Positive_ReadModeElementType - int", + "Unit_texCubemap_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemap_Positive_ReadModeElementType - float", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLod_Positive_ReadModeElementType - char", + "Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLod_Positive_ReadModeElementType - short", + "Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLod_Positive_ReadModeElementType - int", + "Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLod_Positive_ReadModeElementType - float", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapGrad_Positive_ReadModeElementType - char", + "Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapGrad_Positive_ReadModeElementType - short", + "Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapGrad_Positive_ReadModeElementType - int", + "Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapGrad_Positive_ReadModeElementType - float", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLayered_Positive_ReadModeElementType - char", + "Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLayered_Positive_ReadModeElementType - short", + "Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLayered_Positive_ReadModeElementType - int", + "Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLayered_Positive_ReadModeElementType - float", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2Dgather_Positive_ReadModeElementType - char", + "Unit_tex2Dgather_Positive_ReadModeElementType - unsigned char", + "Unit_tex2Dgather_Positive_ReadModeElementType - short", + "Unit_tex2Dgather_Positive_ReadModeElementType - unsigned short", + "Unit_tex2Dgather_Positive_ReadModeElementType - int", + "Unit_tex2Dgather_Positive_ReadModeElementType - unsigned int", + "Unit_tex2Dgather_Positive_ReadModeElementType - float", + "Unit_tex2D_Positive_ReadModeElementType - char", + "Unit_tex2D_Positive_ReadModeElementType - unsigned char", + "Unit_tex2D_Positive_ReadModeElementType - short", + "Unit_tex2D_Positive_ReadModeElementType - unsigned short", + "Unit_tex2D_Positive_ReadModeElementType - int", + "Unit_tex2D_Positive_ReadModeElementType - unsigned int", + "Unit_tex2D_Positive_ReadModeElementType - float", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLayered_Positive_ReadModeElementType - char", + "Unit_tex2DLayered_Positive_ReadModeElementType - unsigned char", + "Unit_tex2DLayered_Positive_ReadModeElementType - short", + "Unit_tex2DLayered_Positive_ReadModeElementType - unsigned short", + "Unit_tex2DLayered_Positive_ReadModeElementType - int", + "Unit_tex2DLayered_Positive_ReadModeElementType - unsigned int", + "Unit_tex2DLayered_Positive_ReadModeElementType - float", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DGrad_Positive_ReadModeElementType - char", + "Unit_tex2DGrad_Positive_ReadModeElementType - unsigned char", + "Unit_tex2DGrad_Positive_ReadModeElementType - short", + "Unit_tex2DGrad_Positive_ReadModeElementType - unsigned short", + "Unit_tex2DGrad_Positive_ReadModeElementType - int", + "Unit_tex2DGrad_Positive_ReadModeElementType - unsigned int", + "Unit_tex2DGrad_Positive_ReadModeElementType - float", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - char", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned char", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - short", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned short", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - int", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned int", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType - float", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLod_Positive_ReadModeElementType - char", + "Unit_tex2DLod_Positive_ReadModeElementType - unsigned char", + "Unit_tex2DLod_Positive_ReadModeElementType - short", + "Unit_tex2DLod_Positive_ReadModeElementType - unsigned short", + "Unit_tex2DLod_Positive_ReadModeElementType - int", + "Unit_tex2DLod_Positive_ReadModeElementType - unsigned int", + "Unit_tex2DLod_Positive_ReadModeElementType - float", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - char", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned char", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - short", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned short", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - int", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned int", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType - float", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_hipDrvGetErrorName_Positive_Basic", + "Unit_hipDrvGetErrorString_Positive_Basic", + "Unit_hipModuleLaunchKernel_Negative_Parameters", + "Unit_hipModuleGetTexRef_Positive_Basic", + "Unit_hipExtModuleLaunchKernel_Positive_Basic", + "Unit_hipExtModuleLaunchKernel_Negative_Parameters", + "Unit_hipLaunchKernel_Negative_Parameters", + "Unit_Kernel_Launch_bounds_Negative_OutOfBounds", + "Unit_Kernel_Launch_bounds_Negative_Parameters_RTC", + "Unit_AtomicBuiltins_Negative_Parameters_RTC", "Note: Test disabled due to defect - EXSWHTEC-151", "Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module", "Note: Test disabled due to defect - EXSWHTEC-152", @@ -446,6 +711,710 @@ "Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed", "Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice", "Unit_hipGraphUpload_Functional_multidevice_test", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_StaticAssert_Positive_Basic_RTC", + "Unit_Assert_Positive_Basic_KernelFail", + "=== Below tests are disabled due to defect EXSWHTEC-356 ===", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", + "=== Below 4 tests are disable due to defect EXSWHTEC-355 ===", + "Unit_Device___hadd_Sanity_Positive", + "Unit_Device___uhadd_Sanity_Positive", + "Unit_Device___rhadd_Sanity_Positive", + "Unit_Device___urhadd_Sanity_Positive", + "SWDEV-435667 : Below tests failed in stress test on 19/01/24 ===", + "Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - float", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - double", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - float", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - double", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned int", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long long", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - float", + "Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - double", + "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t", + "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t", + "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t", + "Below tests failed in stress test of 25/01/24 ===", + "Unit_atomicAnd_Positive_SameAddress - int", + "Unit_atomicAnd_Positive_SameAddress - unsigned int", + "Unit_atomicAnd_Positive_SameAddress - unsigned long", + "Unit_atomicAnd_Positive_SameAddress - unsigned long long", + "Unit_atomicAnd_Positive_Adjacent_Addresses - int", + "Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicAnd_Positive_Scattered_Addresses - int", + "Unit_atomicAnd_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicAnd_Negative_Parameters_RTC", + "Unit_atomicOr_Positive_SameAddress - int", + "Unit_atomicOr_Positive_SameAddress - unsigned int", + "Unit_atomicOr_Positive_SameAddress - unsigned long", + "Unit_atomicOr_Positive_SameAddress - unsigned long long", + "Unit_atomicOr_Positive_Adjacent_Addresses - int", + "Unit_atomicOr_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicOr_Positive_Scattered_Addresses - int", + "Unit_atomicOr_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicOr_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicOr_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicOr_Negative_Parameters_RTC", + "Unit_atomicXor_Positive_SameAddress - int", + "Unit_atomicXor_Positive_SameAddress - unsigned int", + "Unit_atomicXor_Positive_SameAddress - unsigned long", + "Unit_atomicXor_Positive_SameAddress - unsigned long long", + "Unit_atomicXor_Positive_Adjacent_Addresses - int", + "Unit_atomicXor_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicXor_Positive_Scattered_Addresses - int", + "Unit_atomicXor_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicXor_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicXor_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicXor_Negative_Parameters_RTC", + "Unit_atomicMin_Positive_SameAddress - int", + "Unit_atomicMin_Positive_SameAddress - unsigned int", + "Unit_atomicMin_Positive_SameAddress - unsigned long", + "Unit_atomicMin_Positive_SameAddress - unsigned long long", + "Unit_atomicMin_Positive_Adjacent_Addresses - int", + "Unit_atomicMin_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Adjacent_Addresses - float", + "Unit_atomicMin_Positive_Adjacent_Addresses - double", + "Unit_atomicMin_Positive_Scattered_Addresses - int", + "Unit_atomicMin_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicMin_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicMin_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Scattered_Addresses - float", + "Unit_atomicMin_Positive_Scattered_Addresses - double", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_atomicMin_Negative_Parameters_RTC", + "Unit_atomicMax_Positive_SameAddress - int", + "Unit_atomicMax_Positive_SameAddress - unsigned int", + "Unit_atomicMax_Positive_SameAddress - unsigned long", + "Unit_atomicMax_Positive_SameAddress - unsigned long long", + "Unit_atomicMax_Positive_Adjacent_Addresses - int", + "Unit_atomicMax_Positive_Adjacent_Addresses - unsigned int", + "Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long", + "Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Adjacent_Addresses - float", + "Unit_atomicMax_Positive_Adjacent_Addresses - double", + "Unit_atomicMax_Positive_Scattered_Addresses - int", + "Unit_atomicMax_Positive_Scattered_Addresses - unsigned int", + "Unit_atomicMax_Positive_Scattered_Addresses - unsigned long", + "Unit_atomicMax_Positive_Scattered_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Scattered_Addresses - float", + "Unit_atomicMax_Positive_Scattered_Addresses - double", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - int", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned int", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long", + "Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long long", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - int", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - int", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned int", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_atomicMax_Negative_Parameters_RTC", + "Unit_safeAtomicMin_Positive_Adjacent_Addresses - float", + "Unit_safeAtomicMin_Positive_Adjacent_Addresses - double", + "Unit_safeAtomicMin_Positive_Scattered_Addresses - float", + "Unit_safeAtomicMin_Positive_Scattered_Addresses - double", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Scattered_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Scattered_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_safeAtomicMax_Positive_Adjacent_Addresses - float", + "Unit_safeAtomicMax_Positive_Adjacent_Addresses - double", + "Unit_safeAtomicMax_Positive_Scattered_Addresses - float", + "Unit_safeAtomicMax_Positive_Scattered_Addresses - double", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Scattered_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Scattered_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float", + "Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - double", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - double", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - float", + "Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - double", + "Unit_atomicExch_Positive - int", + "Unit_atomicExch_Positive - unsigned int", + "Unit_atomicExch_Positive - unsigned long", + "Unit_atomicExch_Positive - unsigned long long", + "Unit_atomicExch_Positive - float", + "Unit_atomicExch_Positive - double", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned int", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long", + "Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long long", + "Unit___hip_atomic_exchange_Positive_Wavefront - int", + "Unit___hip_atomic_exchange_Positive_Wavefront - unsigned int", + "Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long", + "Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long long", + "Unit___hip_atomic_exchange_Positive_Wavefront - float", + "Unit___hip_atomic_exchange_Positive_Wavefront - double", + "Unit___hip_atomic_exchange_Positive_Workgroup - int", + "Unit___hip_atomic_exchange_Positive_Workgroup - unsigned int", + "Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long", + "Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long long", + "Unit___hip_atomic_exchange_Positive_Workgroup - float", + "Unit___hip_atomic_exchange_Positive_Workgroup - double", + "=== Below tests cause timeout in stress test of 09/02/24 ===", + "Unit_Device___half2half2_Accuracy_Positive", + "Unit_Device_make_half2_Accuracy_Positive", + "Unit_Device___halves2half2_Accuracy_Positive", + "Unit_Device___low2half_Accuracy_Positive", + "Unit_Device___high2half_Accuracy_Positive", + "Unit_Device___low2half2_Accuracy_Positive", + "Unit_Device___high2half2_Accuracy_Positive", + "Unit_Device___lowhigh2highlow_Accuracy_Positive", + "Unit_Device___lows2half2_Accuracy_Positive", + "Unit_Device___highs2half2_Accuracy_Positive", + "Unit_Device___float2half2_rn_Accuracy_Positive", + "Unit_Device___floats2half2_rn_Accuracy_Positive", + "Unit_Device___float22half2_rn_Accuracy_Positive", + "Unit_Device___low2float_Accuracy_Positive", + "Unit_Device___high2float_Accuracy_Positive", + "Unit_Device___half22float2_Accuracy_Positive", + "Unit_Device_hcos_Accuracy_Positive", + "Unit_Device_h2cos_Accuracy_Positive", + "Unit_Device_hsin_Accuracy_Positive", + "Unit_Device_h2sin_Accuracy_Positive", + "Unit_Device_hexp_Accuracy_Positive", + "Unit_Device_h2exp_Accuracy_Positive", + "Unit_Device_hexp10_Accuracy_Positive", + "Unit_Device_h2exp10_Accuracy_Positive", + "Unit_Device_hexp2_Accuracy_Positive", + "Unit_Device_h2exp2_Accuracy_Positive", + "Unit_Device_hlog_Accuracy_Positive", + "Unit_Device_h2log_Accuracy_Positive", + "Unit_Device_hlog10_Accuracy_Positive", + "Unit_Device_h2log10_Accuracy_Positive", + "Unit_Device_hlog2_Accuracy_Positive", + "Unit_Device_h2log2_Accuracy_Positive", + "Unit_Device_hsqrt_Accuracy_Positive", + "Unit_Device_h2sqrt_Accuracy_Positive", + "Unit_Device_hceil_Accuracy_Positive", + "Unit_Device_h2ceil_Accuracy_Positive", + "Unit_Device_hfloor_Accuracy_Positive", + "Unit_Device_h2floor_Accuracy_Positive", + "Unit_Device_htrunc_Accuracy_Positive", + "Unit_Device_h2trunc_Accuracy_Positive", + "Unit_Device_hrcp_Accuracy_Positive", + "Unit_Device_h2rcp_Accuracy_Positive", + "Unit_Device_hrsqrt_Accuracy_Positive", + "Unit_Device_h2rsqrt_Accuracy_Positive", + "Unit_Device_hrint_Accuracy_Positive", + "Unit_Device_h2rint_Accuracy_Positive", + "Unit_Device___habs_Accuracy_Positive", + "Unit_Device___habs2_Accuracy_Positive", + "Unit_Device___hneg_Accuracy_Positive", + "Unit_Device___hneg2_Accuracy_Positive", + "Unit_Device___hadd_wrapper_Accuracy_Positive", + "Unit_Device___hadd2_Accuracy_Positive", + "Unit_Device___hadd_sat_Accuracy_Positive", + "Unit_Device___hadd2_sat_Accuracy_Positive", + "Unit_Device___hsub_Accuracy_Positive", + "Unit_Device___hsub2_Accuracy_Positive", + "Unit_Device___hsub_sat_Accuracy_Positive", + "Unit_Device___hsub2_sat_Accuracy_Positive", + "Unit_Device___hmul_Accuracy_Positive", + "Unit_Device___hmul2_Accuracy_Positive", + "Unit_Device___hmul_sat_Accuracy_Positive", + "Unit_Device___hmul2_sat_Accuracy_Positive", + "Unit_Device___hdiv_Accuracy_Positive", + "Unit_Device___h2div_Accuracy_Positive", + "Unit_Device___hfma_Accuracy_Positive", + "Unit_Device___hfma2_Accuracy_Positive", + "Unit_Device___hfma_sat_Accuracy_Positive", + "Unit_Device___hfma2_sat_Accuracy_Positive", + "Unit_Device___hisinf_Accuracy_Positive", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___heq_Accuracy_Positive", + "Unit_Device___hbeq2_Accuracy_Positive", + "Unit_Device___hequ_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___heq2_Accuracy_Positive", + "Unit_Device___hequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hneu_Accuracy_Positive", + "Unit_Device___hbneu2_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hneu2_Accuracy_Positive", + "Unit_Device___hge_Accuracy_Positive", + "Unit_Device___hbge2_Accuracy_Positive", + "Unit_Device___hgeu_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hge2_Accuracy_Positive", + "Unit_Device___hgeu2_Accuracy_Positive", + "Unit_Device___hgt_Accuracy_Positive", + "Unit_Device___hbgt2_Accuracy_Positive", + "Unit_Device___hgtu_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hgt2_Accuracy_Positive", + "Unit_Device___hgtu2_Accuracy_Positive", + "Unit_Device___hle_Accuracy_Positive", + "Unit_Device___hble2_Accuracy_Positive", + "Unit_Device___hleu_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hle2_Accuracy_Positive", + "Unit_Device___hleu2_Accuracy_Positive", + "Unit_Device___hlt_Accuracy_Positive", + "Unit_Device___hblt2_Accuracy_Positive", + "Unit_Device___hltu_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", + "Unit_Device___hlt2_Accuracy_Positive", + "Unit_Device___hltu2_Accuracy_Positive", + "Unit_Device___hmax_Accuracy_Positive", + "Unit_Device___hmin_Accuracy_Positive", + "Unit_Device___hmax_nan_Accuracy_Positive", + "Unit_Device___hmin_nan_Accuracy_Positive", + "Unit_Device___half2int_rn_Accuracy_Positive", + "Unit_Device___half2int_rz_Accuracy_Positive", + "Unit_Device___half2int_rd_Accuracy_Positive", + "Unit_Device___half2int_ru_Accuracy_Positive", + "Unit_Device___half2uint_rn_Accuracy_Positive", + "Unit_Device___half2uint_rz_Accuracy_Positive", + "Unit_Device___half2uint_rd_Accuracy_Positive", + "Unit_Device___half2uint_ru_Accuracy_Positive", + "Unit_Device___half2short_rn_Accuracy_Positive", + "Unit_Device___half2short_rz_Accuracy_Positive", + "Unit_Device___half2short_rd_Accuracy_Positive", + "Unit_Device___half2short_ru_Accuracy_Positive", + "Unit_Device___half2ushort_rn_Accuracy_Positive", + "Unit_Device___half2ushort_rz_Accuracy_Positive", + "Unit_Device___half2ushort_rd_Accuracy_Positive", + "Unit_Device___half2ushort_ru_Accuracy_Positive", + "Unit_Device___half2ll_rn_Accuracy_Positive", + "Unit_Device___half2ll_rz_Accuracy_Positive", + "Unit_Device___half2ll_rd_Accuracy_Positive", + "Unit_Device___half2ll_ru_Accuracy_Positive", + "Unit_Device___half2ull_rn_Accuracy_Positive", + "Unit_Device___half2ull_rz_Accuracy_Positive", + "Unit_Device___half2ull_rd_Accuracy_Positive", + "Unit_Device___half2ull_ru_Accuracy_Positive", + "Unit_Device___half_as_short_Accuracy_Positive", + "Unit_Device___half_as_ushort_Accuracy_Positive", + "Unit_Device___int2half_rn_Accuracy_Positive", + "Unit_Device___int2half_rz_Accuracy_Positive", + "Unit_Device___int2half_rd_Accuracy_Positive", + "Unit_Device___int2half_ru_Accuracy_Positive", + "Unit_Device___uint2half_rn_Accuracy_Positive", + "Unit_Device___uint2half_rz_Accuracy_Positive", + "Unit_Device___uint2half_rd_Accuracy_Positive", + "Unit_Device___uint2half_ru_Accuracy_Positive", + "Unit_Device___short2half_rn_Accuracy_Positive", + "Unit_Device___short2half_rz_Accuracy_Positive", + "Unit_Device___short2half_rd_Accuracy_Positive", + "Unit_Device___short2half_ru_Accuracy_Positive", + "Unit_Device___ushort2half_rn_Accuracy_Positive", + "Unit_Device___ushort2half_rz_Accuracy_Positive", + "Unit_Device___ushort2half_rd_Accuracy_Positive", + "Unit_Device___ushort2half_ru_Accuracy_Positive", + "Unit_Device___ll2half_rn_Accuracy_Positive", + "Unit_Device___ll2half_rz_Accuracy_Positive", + "Unit_Device___ll2half_rd_Accuracy_Positive", + "Unit_Device___ll2half_ru_Accuracy_Positive", + "Unit_Device___ull2half_rn_Accuracy_Positive", + "Unit_Device___ull2half_rz_Accuracy_Positive", + "Unit_Device___ull2half_rd_Accuracy_Positive", + "Unit_Device___ull2half_ru_Accuracy_Positive", + "Unit_Device___short_as_half_Accuracy_Positive", + "Unit_Device___ushort_as_half_Accuracy_Positive", + "Unit_Device___float2half_rn_Accuracy_Positive", + "Unit_Device___float2half_Accuracy_Positive", + "Unit_Device___half2float_Accuracy_Positive", + "Unit_Device___frcp_rn_Accuracy_Positive", + "Unit_Device___fsqrt_rn_Accuracy_Positive", + "Unit_Device___frsqrt_rn_Accuracy_Positive", + "Unit_Device___expf_Accuracy_Positive", + "Unit_Device___exp10f_Accuracy_Positive", + "Unit_Device___logf_Accuracy_Positive", + "Unit_Device___log2f_Accuracy_Positive", + "Unit_Device___log10f_Accuracy_Positive", + "Unit_Device___sinf_Accuracy_Positive", + "Unit_Device___sincosf_sin_Accuracy_Positive", + "Unit_Device___cosf_Accuracy_Positive", + "Unit_Device___sincosf_cos_Accuracy_Positive", + "Unit_Device___fadd_rn_Accuracy_Positive", + "Unit_Device___fsub_rn_Accuracy_Positive", + "Unit_Device___fmul_rn_Accuracy_Positive", + "Unit_Device___fdiv_rn_Accuracy_Positive", + "Unit_Device___fdividef_Accuracy_Positive", + "Unit_Device___fmaf_rn_Accuracy_Positive", + "Unit_Device___drcp_rn_Accuracy_Positive", + "Unit_Device___dsqrt_rn_Accuracy_Positive", + "Unit_Device___dadd_rn_Accuracy_Positive", + "Unit_Device___dsub_rn_Accuracy_Positive", + "Unit_Device___dmul_rn_Accuracy_Positive", + "Unit_Device___ddiv_rn_Accuracy_Positive", + "Unit_Device___fma_rn_Accuracy_Positive", + "Unit___hip_atomic_load_store_Positive_Acquire_Release", + "Unit___hip_atomic_exchange_Positive_Acquire_Release", + "Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release", + "Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_add_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_and_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_or_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_xor_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_min_Positive_Acquire_Release", + "Unit___hip_atomic_fetch_max_Positive_Acquire_Release", + "Unit___hip_atomic_load_store_Positive_Sequential_Consistency", + "Unit___hip_atomic_exchange_Positive_Sequential_Consistency", + "Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency", + "Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - int", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - float", + "Unit___hip_atomic_fetch_add_Positive_Wavefront - double", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - int", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - float", + "Unit___hip_atomic_fetch_add_Positive_Workgroup - double", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float", + "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float", + "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double", + "Unit_atomicAdd_Positive - int", + "Unit_atomicAdd_Positive - unsigned int", + "Unit_atomicAdd_Positive - unsigned long", + "Unit_atomicAdd_Positive - unsigned long long", + "Unit_atomicAdd_Positive - float", + "Unit_atomicAdd_Positive - double", + "Unit_atomicAdd_Positive_Multi_Kernel - int", + "Unit_atomicAdd_Positive_Multi_Kernel - unsigned int", + "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long", + "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long", + "Unit_atomicAdd_Positive_Multi_Kernel - float", + "Unit_atomicAdd_Positive_Multi_Kernel - double", + "Unit_atomicAdd_Negative_Parameters_RTC", + "Unit_atomicAdd_system_Positive_Peer_GPUs - int", + "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int", + "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long", + "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long", + "Unit_atomicAdd_system_Positive_Peer_GPUs - float", + "Unit_atomicAdd_system_Positive_Peer_GPUs - double", + "Unit_atomicAdd_system_Positive_Host_And_GPU - int", + "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int", + "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long", + "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long", + "Unit_atomicAdd_system_Positive_Host_And_GPU - float", + "Unit_atomicAdd_system_Positive_Host_And_GPU - double", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float", + "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double", + "Unit_unsafeAtomicAdd_Positive - float", + "Unit_unsafeAtomicAdd_Positive - double", + "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float", + "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double", + "Unit_safeAtomicAdd_Positive - float", + "Unit_safeAtomicAdd_Positive - double", + "Unit_safeAtomicAdd_Positive_Multi_Kernel - float", + "Unit_safeAtomicAdd_Positive_Multi_Kernel - double", + "Unit_atomicSub_Positive - int", + "Unit_atomicSub_Positive - unsigned int", + "Unit_atomicSub_Positive - unsigned long", + "Unit_atomicSub_Positive - unsigned long long", + "Unit_atomicSub_Positive - float", + "Unit_atomicSub_Positive - double", + "Unit_atomicSub_Positive_Multi_Kernel - int", + "Unit_atomicSub_Positive_Multi_Kernel - unsigned int", + "Unit_atomicSub_Positive_Multi_Kernel - unsigned long", + "Unit_atomicSub_Positive_Multi_Kernel - unsigned long long", + "Unit_atomicSub_Positive_Multi_Kernel - float", + "Unit_atomicSub_Positive_Multi_Kernel - double", + "Unit_atomicSub_Negative_Parameters_RTC", + "Unit_atomicSub_system_Positive_Peer_GPUs - int", + "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int", + "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long", + "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long", + "Unit_atomicSub_system_Positive_Peer_GPUs - float", + "Unit_atomicSub_system_Positive_Peer_GPUs - double", + "Unit_atomicSub_system_Positive_Host_And_GPU - int", + "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int", + "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long", + "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long", + "Unit_atomicSub_system_Positive_Host_And_GPU - float", + "Unit_atomicSub_system_Positive_Host_And_GPU - double", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float", + "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double", + "Unit_atomicInc_Positive - unsigned int", + "Unit_atomicInc_Positive_Multi_Kernel - unsigned int", + "Unit_atomicInc_Negative_Parameters_RTC", + "Unit_atomicDec_Positive - unsigned int", + "Unit_atomicDec_Positive_Multi_Kernel - unsigned int", + "Unit_atomicDec_Negative_Parameters_RTC", + "Unit_atomicCAS_Positive - int", + "Unit_atomicCAS_Positive - unsigned int", + "Unit_atomicCAS_Positive - unsigned long long", + "Unit_atomicCAS_Positive_Multi_Kernel - int", + "Unit_atomicCAS_Positive_Multi_Kernel - unsigned int", + "Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long", + "Unit_atomicCAS_Negative_Parameters_RTC", + "Unit_atomicCAS_system_Positive_Peer_GPUs - int", + "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int", + "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long", + "Unit_atomicCAS_system_Positive_Host_And_GPU - int", + "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int", + "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long", + "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int", + "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int", + "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long", #endif "End of json" ] diff --git a/catch/hipTestMain/config/config_nvidia_linux.json b/catch/hipTestMain/config/config_nvidia_linux.json index 3fdf6d03a6..c1f1ba1863 100644 --- a/catch/hipTestMain/config/config_nvidia_linux.json +++ b/catch/hipTestMain/config/config_nvidia_linux.json @@ -89,6 +89,149 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior", - "Unit_hipMemcpy_Positive_Synchronization_Behavior" + "Unit_hipMemcpy_Positive_Synchronization_Behavior", + "Unit_tex1Dfetch_Positive_ReadModeElementType - char", + "Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char", + "Unit_tex1Dfetch_Positive_ReadModeElementType - short", + "Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short", + "Unit_tex1Dfetch_Positive_ReadModeElementType - int", + "Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int", + "Unit_tex1Dfetch_Positive_ReadModeElementType - float", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - char", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - short", + "Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemap_Positive_ReadModeElementType - char", + "Unit_texCubemap_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemap_Positive_ReadModeElementType - short", + "Unit_texCubemap_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemap_Positive_ReadModeElementType - int", + "Unit_texCubemap_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemap_Positive_ReadModeElementType - float", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLod_Positive_ReadModeElementType - char", + "Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLod_Positive_ReadModeElementType - short", + "Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLod_Positive_ReadModeElementType - int", + "Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLod_Positive_ReadModeElementType - float", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapGrad_Positive_ReadModeElementType - char", + "Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapGrad_Positive_ReadModeElementType - short", + "Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapGrad_Positive_ReadModeElementType - int", + "Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapGrad_Positive_ReadModeElementType - float", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLayered_Positive_ReadModeElementType - char", + "Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLayered_Positive_ReadModeElementType - short", + "Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLayered_Positive_ReadModeElementType - int", + "Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLayered_Positive_ReadModeElementType - float", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int", + "Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short", + "Unit_hipDrvGetErrorString_Positive_Basic", + "Unit_hipLaunchKernel_Negative_Parameters", + "Unit_Assert_Positive_Basic_KernelFail", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_hipMemImportFromShareableHandle_Positive_MultiProc", + "Unit_hipMemMapArrayAsync_Positive_Basic" ] } diff --git a/catch/hipTestMain/config/config_nvidia_windows.json b/catch/hipTestMain/config/config_nvidia_windows.json index 3e7785a3e8..5d118b16e6 100644 --- a/catch/hipTestMain/config/config_nvidia_windows.json +++ b/catch/hipTestMain/config/config_nvidia_windows.json @@ -44,6 +44,7 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior", - "Unit_hipMemcpy_Positive_Synchronization_Behavior" + "Unit_hipMemcpy_Positive_Synchronization_Behavior", + "Unit_hipMemMapArrayAsync_Positive_Basic" ] } diff --git a/catch/hipTestMain/main.cc b/catch/hipTestMain/main.cc index 109b0593fc..e869695572 100644 --- a/catch/hipTestMain/main.cc +++ b/catch/hipTestMain/main.cc @@ -36,6 +36,12 @@ int main(int argc, char** argv) { | Opt(cmd_options.cg_iterations, "cg_iterations") ["-C"]["--cg-iterations"] ("Number of iterations used for cooperative groups sync tests (default: 5)") + | Opt(cmd_options.accuracy_iterations, "accuracy_iterations") + ["-A"]["--accuracy-iterations"] + ("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)") + | Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory") + ["-M"]["--accuracy-max-memory"] + ("Percentage of global device memory allowed for math accuracy tests (default: 80%)") ; // clang-format on diff --git a/catch/include/cmd_options.hh b/catch/include/cmd_options.hh index 6caf7a0f48..666f34ea82 100644 --- a/catch/include/cmd_options.hh +++ b/catch/include/cmd_options.hh @@ -22,6 +22,9 @@ THE SOFTWARE. #pragma once +#include +#include + struct CmdOptions { int iterations = 10; int warmups = 100; @@ -29,6 +32,8 @@ struct CmdOptions { int cg_iterations = 5; bool no_display = false; bool progress = false; + uint64_t accuracy_iterations = std::numeric_limits::max() + 1ull; + int accuracy_max_memory = 80; }; extern CmdOptions cmd_options; diff --git a/catch/unit/memory/hipMallocManagedCommon.hh b/catch/include/hipMallocManagedCommon.hh similarity index 100% rename from catch/unit/memory/hipMallocManagedCommon.hh rename to catch/include/hipMallocManagedCommon.hh diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index 147abe0941..21707f7615 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -129,6 +129,19 @@ THE SOFTWARE. } \ } +// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError. +#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \ + { \ + auto localError = errorExpr; \ + INFO("Matching Errors: " \ + << "\n Expected Error: " << hiprtcGetErrorString(expectedError) \ + << "\n Expected Code: " << expectedError << '\n' \ + << " Actual Error: " << hiprtcGetErrorString(localError) \ + << "\n Actual Code: " << localError << "\nStr: " << #errorExpr \ + << "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \ + REQUIRE(localError == expectedError); \ + } + #define HIPASSERT(condition) \ if (!(condition)) { \ printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \ @@ -165,7 +178,7 @@ static inline bool IsGfx11() { hipDeviceProp_t props{}; HIP_CHECK(hipGetDevice(&device)); HIP_CHECK(hipGetDeviceProperties(&props, device)); - // Get GCN Arch Name and compare to check if it is gfx11 + // Get GCN Arch Name and compare to check if it is gfx11 std::string arch = std::string(props.gcnArchName); auto pos = arch.find("gfx11"); if (pos != std::string::npos) @@ -173,7 +186,7 @@ static inline bool IsGfx11() { else return false; #else - std::cout<<"Have to be either Nvidia or AMD platform, asserting"<(kernel, numBlocks, numThreads, memPerBlock, stream, std::forward(packedArgs)...); #endif -HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipGetLastError()); } //--- diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 680dfa8a04..edb89cbc3e 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -39,6 +39,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup AtomicsTest Device Atomics + * @{ + * This section describes tests for the Device Atomic APIs. + * @} + */ + /** * @defgroup DeviceLanguageTest Device Language * @{ @@ -96,16 +103,23 @@ THE SOFTWARE. */ /** -* @defgroup KernelTest Kernel Functions Management -* @{ -* This section describes the various kernel functions invocation. -* @} -*/ + * @defgroup KernelTest Kernel Functions Management + * @{ + * This section describes the various kernel functions invocation. + * @} + */ /** - * @defgroup AtomicsTest Device Atomics + * @defgroup SyncthreadsTest Synchronization Functions * @{ - * This section describes tests for the Device Atomic APIs. + * This section describes tests for Synchronization Functions. + * @} + */ + +/** + * @defgroup ThreadfenceTest Memory Fence Functions + * @{ + * This section describes tests for Memory Fence Functions. * @} */ @@ -119,7 +133,8 @@ THE SOFTWARE. /** * @defgroup PeerToPeerTest PeerToPeer Device Memory Access * @{ - * This section describes tests for the PeerToPeer device memory access functions of HIP runtime API. + * This section describes tests for the PeerToPeer device memory access functions of HIP runtime + * API. * @warning PeerToPeer support is experimental. * @} */ @@ -135,6 +150,7 @@ THE SOFTWARE. * @defgroup ShflTest warp shuffle function Management * @{ * This section describes the warp shuffle types & functions of HIP runtime API. + * @} */ /** @@ -158,6 +174,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup ModuleTest Module Management + * @{ + * This section describes the module management types & functions of HIP runtime API. + * @} + */ + /** * @defgroup TextureTest Texture Management * @{ @@ -172,6 +195,13 @@ THE SOFTWARE. * @} */ + /** + * @defgroup MathTest Math Device Functions + * @{ + * This section describes tests for device math functions of HIP runtime API. + * @} + */ + /** * @defgroup PrintfTest Printf API Management * @{ @@ -192,3 +222,10 @@ THE SOFTWARE. * This section describes tests for the Complex type functions. * @} */ + +/** + * @defgroup VirtualMemoryManagementTest Virtual Memory Management APIs + * @{ + * This section describes the virtual memory management types & functions of HIP runtime API. + * @} + */ diff --git a/catch/include/memcpy3d_tests_common.hh b/catch/include/memcpy3d_tests_common.hh index 84d0fc517b..e55469534c 100644 --- a/catch/include/memcpy3d_tests_common.hh +++ b/catch/include/memcpy3d_tests_common.hh @@ -23,7 +23,7 @@ THE SOFTWARE. #pragma once #pragma clang diagnostic ignored "-Wmissing-field-initializers" #pragma clang diagnostic ignored "-Wunused-lambda-capture" - +#pragma clang diagnostic ignored "-Wunused-parameter" #include #include @@ -44,8 +44,9 @@ static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction } }; -static hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, - hipPos src_pos, hipExtent extent, hipMemcpyKind kind) { +static inline hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, + PtrVariant src_ptr, hipPos src_pos, + hipExtent extent, hipMemcpyKind kind) { hipMemcpy3DParms parms = {0}; if (std::holds_alternative(dst_ptr)) { parms.dstArray = std::get(dst_ptr); @@ -185,7 +186,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); if (!can_access_peer) { std::string msg = "Skipped as peer access cannot be enabled between devices " + - std::to_string(src_device) + " " + std::to_string(dst_device); + std::to_string(src_device) + " " + std::to_string(dst_device); HipTest::HIP_SKIP_TEST(msg.c_str()); return; } @@ -205,7 +206,8 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null // Using dst_alloc width and height to set only the elements that will be copied over to // dst_alloc Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), - dst_alloc.width_logical(),dst_alloc.height(), dst_alloc.depth()); + dst_alloc.width_logical(), + dst_alloc.height(), dst_alloc.depth()); HIP_CHECK(hipGetLastError()); HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), @@ -626,15 +628,14 @@ constexpr auto MemTypeUnified() { using DrvPtrVariant = std::variant; -template -hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr, - hipPos src_pos, hipExtent extent, hipMemcpyKind kind, - hipStream_t stream = nullptr) { +static inline HIP_MEMCPY3D GetDrvMemcpy3DParms(DrvPtrVariant dst_ptr, hipPos dst_pos, + DrvPtrVariant src_ptr, hipPos src_pos, + hipExtent extent, hipMemcpyKind kind) { HIP_MEMCPY3D parms = {0}; if (std::holds_alternative(dst_ptr)) { parms.dstMemoryType = hipMemoryTypeArray; - parms.dstArray = std::get(dst_ptr); + parms.dstArray = std::get(dst_ptr); } else { auto ptr = std::get(dst_ptr); parms.dstPitch = ptr.pitch; @@ -694,6 +695,84 @@ hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVaria parms.dstY = dst_pos.y; parms.dstZ = dst_pos.z; + return parms; +} + +static inline bool operator==(const HIP_MEMCPY3D& lhs, const HIP_MEMCPY3D& rhs) { + bool pos_eq = lhs.dstXInBytes == rhs.dstXInBytes && lhs.dstY == rhs.dstY && + lhs.dstZ == rhs.dstZ && lhs.srcXInBytes == rhs.srcXInBytes && lhs.srcY == rhs.srcY && + lhs.srcZ == rhs.srcZ; + bool extent_eq = + lhs.WidthInBytes == rhs.WidthInBytes && lhs.Height == rhs.Height && lhs.Depth == rhs.Depth; + bool mem_eq = true; + if (lhs.dstArray) { + mem_eq = lhs.dstArray == rhs.dstArray && lhs.dstMemoryType == rhs.dstMemoryType; + } else { + mem_eq = lhs.dstPitch == rhs.dstPitch && lhs.dstMemoryType == rhs.dstMemoryType; + } + if (lhs.srcArray) { + mem_eq = lhs.srcArray == rhs.srcArray && lhs.srcMemoryType == rhs.srcMemoryType; + } else { + mem_eq = lhs.srcPitch == rhs.srcPitch && lhs.srcMemoryType == rhs.srcMemoryType; + } + if (lhs.dstDevice) { + mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice); + } + if (lhs.dstHost) { + mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice); + } + if (lhs.srcDevice) { + mem_eq = mem_eq && (lhs.srcDevice == rhs.srcDevice); + } + if (lhs.srcHost) { + mem_eq = mem_eq && (lhs.srcHost == rhs.srcHost); + } + + return pos_eq && extent_eq && mem_eq; +} + +// APIs hipDrvGraphMemcpyNodeGetParams, hipDrvGraphMemcpyNodeSetParams are yet to be implemented in HIP runtime. +#if 0 +template +hipError_t DrvMemcpy3DGraphWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipCtx_t context, hipStream_t stream = nullptr) { + auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + + hipGraph_t g = nullptr; + HIP_CHECK(hipGraphCreate(&g, 0)); + hipGraphNode_t node = nullptr; + if constexpr (set_params) { + auto reversed_parms = GetDrvMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent, + ReverseMemcpyDirection(kind)); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms, context)); + HIP_CHECK(hipDrvGraphMemcpyNodeSetParams(node, &parms)); + } else { + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &parms, context)); + } + + HIP_MEMCPY3D retrieved_params = {0}; + HIP_CHECK(hipDrvGraphMemcpyNodeGetParams(node, &retrieved_params)); + REQUIRE(parms == retrieved_params); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(g)); + + return hipSuccess; +} +#endif //if 0 + +template +hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipStream_t stream = nullptr) { + auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + if constexpr (async) { return hipDrvMemcpy3DAsync(&parms, stream); } else { @@ -805,4 +884,4 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream }; PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height, extent.depth, f); -} +} \ No newline at end of file diff --git a/catch/include/resource_guards.hh b/catch/include/resource_guards.hh index 5f8f2cbee4..c2f32e39f5 100644 --- a/catch/include/resource_guards.hh +++ b/catch/include/resource_guards.hh @@ -35,15 +35,15 @@ enum class LinearAllocs { inline std::string to_string(const LinearAllocs allocation_type) { switch (allocation_type) { case LinearAllocs::malloc: - return "host pageable"; + return "malloc"; case LinearAllocs::mallocAndRegister: - return "registered"; + return "malloc + hipHostRegister"; case LinearAllocs::hipHostMalloc: - return "host pinned"; + return "hipHostMalloc"; case LinearAllocs::hipMalloc: - return "device malloc"; + return "hipMalloc"; case LinearAllocs::hipMallocManaged: - return "managed"; + return "hipMallocManaged"; default: return "unknown alloc type"; } @@ -83,24 +83,38 @@ template class LinearAllocGuard { LinearAllocGuard(const LinearAllocGuard&) = delete; - LinearAllocGuard(LinearAllocGuard&& o) - : allocation_type_{o.allocation_type_}, ptr_{o.ptr_}, host_ptr_{o.host_ptr_} { - o.allocation_type_ = LinearAllocs::noAlloc; - o.ptr_ = nullptr; - o.host_ptr_ = nullptr; - } + LinearAllocGuard(LinearAllocGuard&& o) { *this = std::move(o); } LinearAllocGuard& operator=(LinearAllocGuard&& o) { - allocation_type_ = o.allocation_type_; - ptr_ = o.ptr_; - host_ptr_ = o.host_ptr_; + if (this != &o) { + dealloc(); - o.allocation_type_ = LinearAllocs::noAlloc; - o.ptr_ = nullptr; - o.host_ptr_ = nullptr; + allocation_type_ = o.allocation_type_; + ptr_ = o.ptr_; + host_ptr_ = o.host_ptr_; + + o.allocation_type_ = LinearAllocs::noAlloc; + o.ptr_ = nullptr; + o.host_ptr_ = nullptr; + } + + return *this; } - ~LinearAllocGuard() { + ~LinearAllocGuard() { dealloc(); } + + T* ptr() const { return ptr_; }; + T* host_ptr() const { return host_ptr_; } + + private: + LinearAllocs allocation_type_ = LinearAllocs::noAlloc; + T* ptr_ = nullptr; + T* host_ptr_ = nullptr; + + void dealloc() { + if (ptr_ == nullptr) { + return; + } // No Catch macros, don't want to possibly throw in the destructor if (ptr_ != nullptr) { switch (allocation_type_) { @@ -123,14 +137,6 @@ template class LinearAllocGuard { } } } - - T* ptr() const { return ptr_; }; - T* host_ptr() const { return host_ptr_; } - - private: - LinearAllocs allocation_type_ = LinearAllocs::noAlloc; - T* ptr_ = nullptr; - T* host_ptr_ = nullptr; }; template class LinearAllocGuardMultiDim { @@ -210,6 +216,42 @@ template class ArrayAllocGuard { const hipExtent extent_; }; +template class MipmappedArrayAllocGuard { + public: + // extent should contain logical width + MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int levels, + const unsigned int flags) + : extent_{extent}, levels_{levels} { + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HIP_CHECK(hipMallocMipmappedArray(&ptr_, &desc, extent_, levels_, flags)); + } + + MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) + : MipmappedArrayAllocGuard{extent, 1, flags} {} + + ~MipmappedArrayAllocGuard() { static_cast(hipFreeMipmappedArray(ptr_)); } + + MipmappedArrayAllocGuard(const MipmappedArrayAllocGuard&) = delete; + MipmappedArrayAllocGuard(MipmappedArrayAllocGuard&&) = delete; + + hipMipmappedArray_t ptr() const { return ptr_; } + + hipArray_t GetLevel(unsigned int level) { + hipArray_t ret; + HIP_CHECK(hipGetMipmappedArrayLevel(&ret, ptr_, level)); + return ret; + } + + hipExtent extent() const { return extent_; } + + unsigned int levels() const { return levels_; } + + private: + hipMipmappedArray_t ptr_ = nullptr; + const hipExtent extent_; + const unsigned int levels_; +}; + template class DrvArrayAllocGuard { public: // extent should contain width in bytes @@ -266,24 +308,24 @@ class StreamGuard { StreamGuard(const StreamGuard&) = delete; - StreamGuard(StreamGuard&& o) - : stream_type_{o.stream_type_}, flags_{o.flags_}, priority_{o.priority_}, stream_{o.stream_} { - o.stream_type_ = Streams::nullstream; - o.flags_ = 0u; - o.priority_ = 0; - o.stream_ = nullptr; - } + StreamGuard(StreamGuard&& o) { *this = std::move(o); } StreamGuard& operator=(StreamGuard&& o) { - stream_type_ = o.stream_type_; - flags_ = o.flags_; - priority_ = o.priority_; - stream_ = o.stream_; + if (this != &o) { + if (stream_type_ == Streams::created) { + static_cast(hipStreamDestroy(stream_)); + } - o.stream_type_ = Streams::nullstream; - o.flags_ = 0u; - o.priority_ = 0; - o.stream_ = nullptr; + stream_type_ = o.stream_type_; + flags_ = o.flags_; + priority_ = o.priority_; + stream_ = o.stream_; + + o.stream_type_ = Streams::nullstream; + o.flags_ = 0u; + o.priority_ = 0; + o.stream_ = nullptr; + } return *this; } diff --git a/catch/include/utils.hh b/catch/include/utils.hh index f025768c14..3855308a42 100644 --- a/catch/include/utils.hh +++ b/catch/include/utils.hh @@ -170,7 +170,7 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes) return (... && DeviceAttributeSupport(device, attributes)); } -inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) { +inline int GetDeviceAttribute(const hipDeviceAttribute_t attr, int device) { int value = 0; HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); return value; diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 304016410e..6b63292c91 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -22,6 +22,7 @@ add_subdirectory(rtc) add_subdirectory(deviceLib) add_subdirectory(graph) add_subdirectory(memory) +add_subdirectory(stream_ordered) add_subdirectory(stream) add_subdirectory(event) add_subdirectory(occupancy) @@ -43,11 +44,15 @@ add_subdirectory(g++) add_subdirectory(module) add_subdirectory(channelDescriptor) add_subdirectory(executionControl) +add_subdirectory(math) add_subdirectory(vector_types) add_subdirectory(atomics) add_subdirectory(complex) add_subdirectory(p2p) add_subdirectory(gcc) +add_subdirectory(syncthreads) +add_subdirectory(threadfence) +add_subdirectory(virtualMemoryManagement) if(HIP_PLATFORM STREQUAL "amd") add_subdirectory(callback) @@ -58,3 +63,5 @@ add_subdirectory(vulkan_interop) add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246 endif() add_subdirectory(synchronization) +add_subdirectory(launchBounds) +add_subdirectory(assertion) \ No newline at end of file diff --git a/catch/unit/assertion/CMakeLists.txt b/catch/unit/assertion/CMakeLists.txt new file mode 100644 index 0000000000..c98656cf1c --- /dev/null +++ b/catch/unit/assertion/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(HIP_PLATFORM MATCHES "nvidia") + set(TEST_SRC + assert.cc + ) + hip_add_exe_to_target(NAME AssertionTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + static_assert.cc + assert.cc + ) + hip_add_exe_to_target(NAME AssertionTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +# Below tests fail in PSDB +#add_test(NAME Unit_StaticAssert_Positive_Basic +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# static_assert_kernels_positive.cc 2) +# +#add_test(NAME Unit_StaticAssert_Negative_Basic +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# static_assert_kernels_negative.cc 2) diff --git a/catch/unit/assertion/assert.cc b/catch/unit/assertion/assert.cc new file mode 100644 index 0000000000..29cadd2896 --- /dev/null +++ b/catch/unit/assertion/assert.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup assert assert + * @{ + * @ingroup DeviceLanguageTest + * `void assert(int expression)` - + * Stops the kernel execution if expression is equal to zero. + */ + +jmp_buf env_ignore_abort; +volatile int abort_raised_flag = 0; + +void on_sigabrt(int signum) { + signal(signum, SIG_DFL); + abort_raised_flag = 1; + longjmp(env_ignore_abort, 1); +} + +void try_and_catch_abort(void (*func)()) { + if (!setjmp(env_ignore_abort)) { + signal(SIGABRT, &on_sigabrt); + (*func)(); + signal(SIGABRT, SIG_DFL); + } +} + +__global__ void AssertPassKernel(int* x) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + *x = tid; + // expected always to be true + assert(tid >= 0); +} + +__global__ void AssertFailKernel(int* x) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + *x = tid; + // expected to fail for the even thread indices + assert(tid % 2 == 1); +} + +template void LaunchAssertKernel() { + const int num_blocks = 2; + const int num_threads = 16; + int *d_a; + HIP_CHECK(hipMalloc(&d_a, sizeof(int))); + + if constexpr (should_abort) { + AssertFailKernel<<>>(d_a); +#if HT_AMD + HIP_CHECK(hipDeviceSynchronize()); +#else + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert); +#endif + } else { + AssertPassKernel<<>>(d_a); + HIP_CHECK(hipDeviceSynchronize()); + } + + HIP_CHECK(hipFree(d_a)); +} + +/** + * Test Description + * ------------------------ + * - Launches kernels with asserts that have an expression equal to 1. + * - Expects that SIGABRT is not raised and kernels have executed successfully. + * Test source + * ------------------------ + * - unit/assertion/assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") { + try_and_catch_abort(&LaunchAssertKernel); + REQUIRE(abort_raised_flag == 0); +} + +/** + * Test Description + * ------------------------ + * - Launches kernels with asserts that have an expression equal to 0. + * - Expects that SIGABRT is raised and kernels have been stopped on AMD. + * - The HIP runtime also aborts the host code, so this test case uses signal handlers + * to avoid host code abortion. + * - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA. + * - The host code is not aborted. + * Test source + * ------------------------ + * - unit/assertion/assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") { + try_and_catch_abort(&LaunchAssertKernel); +#if HT_AMD + REQUIRE(abort_raised_flag == 1); +#else + REQUIRE(abort_raised_flag == 0); +#endif +} diff --git a/catch/unit/assertion/static_assert.cc b/catch/unit/assertion/static_assert.cc new file mode 100644 index 0000000000..508db295b7 --- /dev/null +++ b/catch/unit/assertion/static_assert.cc @@ -0,0 +1,88 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "static_assert_kernels_rtc.hh" + +/** + * @addtogroup static_assert static_assert + * @{ + * @ingroup DeviceLanguageTest + * `void static_assert(constexpr expression, const char* message)` - + * Stops the compilation if expression is equal to zero, and displays the specified message. + */ + +void StaticAssertWrapper(const char* program_source) { + hiprtcProgram program{}; + + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} + +/** + * Test Description + * ------------------------ + * - Compiles kernels with static_assert calls: + * -# Expected that static_assert passes and compilation is successful. + * -# Expected that static_assert fails and compilation has errors. + * - Uses RTC to perform compilation. + * Test source + * ------------------------ + * - unit/assertion/static_assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); } + +/** + * Test Description + * ------------------------ + * - Passes invalidly formed expressions to static_assert calls. + * - Uses expressions that are not constexpr and values that are not known during compilation. + * - Uses RTC to perform compilation. + * Test source + * ------------------------ + * - unit/assertion/static_assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); } diff --git a/catch/unit/assertion/static_assert_kernels_negative.cc b/catch/unit/assertion/static_assert_kernels_negative.cc new file mode 100644 index 0000000000..777f27855c --- /dev/null +++ b/catch/unit/assertion/static_assert_kernels_negative.cc @@ -0,0 +1,30 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void StaticAssertErrorKernel1() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]"); +} + +__global__ void StaticAssertErrorKernel2() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(++tid > 2, "[StaticAssertErrorKernel2]"); +} diff --git a/catch/unit/assertion/static_assert_kernels_positive.cc b/catch/unit/assertion/static_assert_kernels_positive.cc new file mode 100644 index 0000000000..2ed0d7b68c --- /dev/null +++ b/catch/unit/assertion/static_assert_kernels_positive.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void StaticAssertPassKernel1() { + static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]"); +} + +__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); } + +__global__ void StaticAssertFailKernel1() { + static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]"); +} + +__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); } diff --git a/catch/unit/assertion/static_assert_kernels_rtc.hh b/catch/unit/assertion/static_assert_kernels_rtc.hh new file mode 100644 index 0000000000..5bb7419e30 --- /dev/null +++ b/catch/unit/assertion/static_assert_kernels_rtc.hh @@ -0,0 +1,56 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Positive and negative kernels used for the static_assert Test Cases that are using RTC. +*/ + +static constexpr auto kStaticAssert_Positive{ + R"( + __global__ void StaticAssertPassKernel1() { + static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]"); + } + + __global__ void StaticAssertPassKernel2() { + static_assert(10 > 5, "[StaticAssertPassKernel2]"); + } + + __global__ void StaticAssertFailKernel1() { + static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]"); + } + + __global__ void StaticAssertFailKernel2() { + static_assert(10 < 5, "[StaticAssertFailKernel2]"); + } + )"}; + +static constexpr auto kStaticAssert_Negative{ + R"( + __global__ void StaticAssertErrorKernel1() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]"); + } + + __global__ void StaticAssertErrorKernel2() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(++tid > 2, "[StaticAssertErrorKernel2]"); + } + )"}; diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index d8066a2f1a..101d6dddc2 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -18,31 +18,145 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -set(TEST_SRC - atomicExch.cc - atomicExch_system.cc -) +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + atomicAnd.cc + atomicAnd_system.cc + atomicOr.cc + atomicOr_system.cc + atomicXor.cc + atomicXor_system.cc + atomicMin.cc + atomicMin_system.cc + atomicMax.cc + atomicMax_system.cc + safeAtomicMin.cc + unsafeAtomicMin.cc + safeAtomicMax.cc + unsafeAtomicMax.cc + __hip_atomic_fetch_min.cc + __hip_atomic_fetch_max.cc + atomic_builtins.cc + acquire_release.cc + sequential_consistency.cc + atomicAdd.cc + atomicAdd_system.cc + unsafeAtomicAdd.cc + safeAtomicAdd.cc + atomicSub.cc + atomicSub_system.cc + atomicCAS.cc + atomicCAS_system.cc + __hip_atomic_fetch_add.cc + __hip_atomic_compare_exchange_strong.cc + atomicExch.cc + atomicExch_system.cc + __hip_atomic_fetch_and.cc + __hip_atomic_fetch_or.cc + __hip_atomic_fetch_xor.cc + __hip_atomic_exchange.cc + ) -if(HIP_PLATFORM MATCHES "nvidia") - set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") - hip_add_exe_to_target(NAME AtomicsTest - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - LINKER_LIBS "nvrtc -rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") -elseif(HIP_PLATFORM MATCHES "amd") - hip_add_exe_to_target(NAME AtomicsTest + #atomicInc & atomicDec tests are disabled on MI300X due to SWDEV-440688 + set(NOT_FOR_MI300X_TEST + atomicInc.cc + atomicDec.cc + ) + set(MI300X_TARGET gfx941) + function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL) + set(ARCH_CHECK -1 PARENT_SCOPE) + string(REGEX MATCHALL "--offload-arch=gfx[0-9a-z]+" OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR_LOCAL}) + foreach(OFFLOAD_ARCH IN LISTS OFFLOAD_ARCH_LIST) + string(REGEX MATCHALL "--offload-arch=(gfx[0-9a-z]+)" matches ${OFFLOAD_ARCH}) + if (CMAKE_MATCH_COUNT EQUAL 1) + if (CMAKE_MATCH_1 IN_LIST MI300X_TARGET) + set(ARCH_CHECK 1 PARENT_SCOPE) + endif() # CMAKE_MATCH_1 + endif() # CMAKE_MATCH_COUNT + endforeach() # OFFLOAD_ARCH_LIST + endfunction() # CheckAcceptedArchs + + if (DEFINED OFFLOAD_ARCH_STR) + CheckRejectedArchs(${OFFLOAD_ARCH_STR}) + elseif(DEFINED $ENV{HCC_AMDGPU_TARGET}) + CheckRejectedArchs($ENV{HCC_AMDGPU_TARGET}) + else() + set(ARCH_CHECK -1) + endif() + if(${ARCH_CHECK} EQUAL -1) + message(STATUS "Adding test: ${NOT_FOR_MI300X_TEST}") + set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI300X_TEST}) + else() + message(STATUS "Removing test: ${NOT_FOR_MI300X_TEST}") + endif() + + + hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests LINKER_LIBS hiprtc) -endif() + set(EXPECTED_ERRORS 48) -# SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 -#add_test(NAME Unit_atomicExch_Negative_Parameters -# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py -# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} -# atomicExch_negative_kernels.cc 40) -# -#add_test(NAME Unit_atomicExch_system_Negative_Parameters -# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py -# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} -# atomicExch_system_negative_kernels.cc 40) + # Below tests fail in PSDB + #add_test(NAME Unit_atomicAnd_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicAnd_negative_kernels.cc ${EXPECTED_ERRORS}) + # + #add_test(NAME Unit_atomicOr_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicOr_negative_kernels.cc ${EXPECTED_ERRORS}) + # + #add_test(NAME Unit_atomicXor_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicXor_negative_kernels.cc ${EXPECTED_ERRORS}) + # + #add_test(NAME Unit_atomicMin_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicMin_negative_kernels.cc ${EXPECTED_ERRORS}) + # + #add_test(NAME Unit_atomicMax_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicMax_negative_kernels.cc ${EXPECTED_ERRORS}) + #add_test(NAME Unit_AtomicBuiltins_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomic_builtins_kernels.cc 60 27) # Should be 35 warnings, see EXSWHTEC-309 + #add_test(NAME Unit_atomicAdd_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicAdd_negative_kernels.cc 48) + #add_test(NAME Unit_atomicSub_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicSub_negative_kernels.cc 48) + #add_test(NAME Unit_atomicInc_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicInc_negative_kernels.cc 8) + # + #add_test(NAME Unit_atomicDec_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicDec_negative_kernels.cc 8) + # + #add_test(NAME Unit_atomicCAS_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicCAS_negative_kernels.cc 48) + # + # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 + #add_test(NAME Unit_atomicExch_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicExch_negative_kernels.cc 40) + # + #add_test(NAME Unit_atomicExch_system_Negative_Parameters + # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + # ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + # atomicExch_system_negative_kernels.cc 40) +endif() \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc b/catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc new file mode 100644 index 0000000000..69fd72ec51 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc @@ -0,0 +1,129 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_compare_exchange_strong __hip_atomic_compare_exchange_strong + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_compare_exchange_strong + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WAVEFRONT memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_compare_exchange_strong.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_compare_exchange_strong + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WORKGROUP memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_compare_exchange_strong.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_exchange.cc b/catch/unit/atomics/__hip_atomic_exchange.cc new file mode 100644 index 0000000000..a518aaafbe --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_exchange.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicExch_common.hh" + +/** + * @addtogroup __hip_atomic_exchange __hip_atomic_exchange + * @{ + * @ingroup AtomicsTest + * ________________________ + * Test cases from other modules: + * - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * exchange into a runtime determined memory location. Each thread will exchange its own grid wide + * linear index + offset into the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the union of output array and exchange memory is + * validated to contain all values in the range [0, number_of_threads + + * number_of_exchange_memory_slots). Several memory access patterns are tested: + * -# All threads exchange to a single memory location + * -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicExch + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory + * - Exchange memory located in shared memory + * - WAVEFRONT memory scope + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_exchange.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Wavefront", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + AtomicExchSingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * exchange into a runtime determined memory location. Each thread will exchange its own grid wide + * linear index + offset into the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the union of output array and exchange memory is + * validated to contain all values in the range [0, number_of_threads + + * number_of_exchange_memory_slots). Several memory access patterns are tested: + * -# All threads exchange to a single memory location + * -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicExch + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory + * - Exchange memory located in shared memory + * - WORKGROUP memory scope + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_exchange.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Workgroup", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + AtomicExchSingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_fetch_add.cc b/catch/unit/atomics/__hip_atomic_fetch_add.cc new file mode 100644 index 0000000000..075b2b858e --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_add.cc @@ -0,0 +1,132 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add + * @{ + * @ingroup AtomicsTest + * ________________________ + * Test cases from other modules: + * - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_fetch_add + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WAVEFRONT memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_add.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Wavefront", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_fetch_add + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WORKGROUP memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_add.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Workgroup", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_fetch_and.cc b/catch/unit/atomics/__hip_atomic_fetch_and.cc new file mode 100644 index 0000000000..51fd37bf59 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_and.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_and __hip_atomic_fetch_and + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/__hip_atomic_fetch_max.cc b/catch/unit/atomics/__hip_atomic_fetch_max.cc new file mode 100644 index 0000000000..cc42309333 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_max.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_max __hip_atomic_fetch_max + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_fetch_min.cc b/catch/unit/atomics/__hip_atomic_fetch_min.cc new file mode 100644 index 0000000000..f09a3732f9 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_min.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_min __hip_atomic_fetch_min + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_fetch_or.cc b/catch/unit/atomics/__hip_atomic_fetch_or.cc new file mode 100644 index 0000000000..000df50f80 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_or.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_or __hip_atomic_fetch_or + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/__hip_atomic_fetch_xor.cc b/catch/unit/atomics/__hip_atomic_fetch_xor.cc new file mode 100644 index 0000000000..0f3f3f3743 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_xor.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_xor __hip_atomic_fetch_xor + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/acquire_release.cc b/catch/unit/atomics/acquire_release.cc new file mode 100644 index 0000000000..7e0996f566 --- /dev/null +++ b/catch/unit/atomics/acquire_release.cc @@ -0,0 +1,551 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "memory_order_common.hh" + +TEST_CASE("Unit___hip_atomic_load_store_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_exchange_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/arithmetic_common.hh b/catch/unit/atomics/arithmetic_common.hh new file mode 100644 index 0000000000..0c142c4506 --- /dev/null +++ b/catch/unit/atomics/arithmetic_common.hh @@ -0,0 +1,577 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +// Atomic operations for which the tests in this file apply for +enum class AtomicOperation { + kAdd = 0, + kAddSystem, + kSub, + kSubSystem, + kInc, + kDec, + kUnsafeAdd, + kSafeAdd, + kCASAdd, + kCASAddSystem, + kBuiltinAdd, + kBuiltinCAS +}; + +// Constants that are passed as operands to the atomic operations +constexpr auto kIntegerTestValue = 7; +constexpr auto kFloatingPointTestValue = 3.125; +constexpr auto kIncDecWraparoundValue = 1023; + +// Retrieves test value constant based on the atomic operation and test type: +// - kIncDecWraparoundValue for increment and decrement operations +// - kFloatingPointTestValue for floating point test type +// - kIntegerTestValue for integer test type +template +__host__ __device__ TestType GetTestValue() { + if constexpr (operation == AtomicOperation::kInc || operation == AtomicOperation::kDec) { + return kIncDecWraparoundValue; + } + + return std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; +} + +// Implements an atomic addition via atomicCAS +template __device__ TestType CASAtomicAdd(TestType* address, TestType val) { + TestType old = *address, assumed; + + do { + assumed = old; + old = atomicCAS(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + +// Implements an atomic addition via atomicCAS_system +template +__device__ TestType CASAtomicAddSystem(TestType* address, TestType val) { + TestType old = *address, assumed; + + do { + assumed = old; + old = atomicCAS_system(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + +// Implements an atomic addition via __hip_atomic_compare_exchange_strong +template +__device__ TestType BuiltinCASAtomicAdd(TestType* address, TestType val) { + TestType old = *address, assumed; + + const auto builtin_cas = [](TestType* address, TestType assumed, TestType val) { + __hip_atomic_compare_exchange_strong(address, &assumed, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + memory_scope); + return assumed; + }; + + do { + assumed = old; + old = builtin_cas(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + +// Performs an atomic operation on parameter `mem` based on the `operation` enumerator. +// `memory_scope` is forwarded to the builtin operations and is by default device-wide. +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto val = GetTestValue(); + + if constexpr (operation == AtomicOperation::kAdd) { + return atomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kAddSystem) { + return atomicAdd_system(mem, val); + } else if constexpr (operation == AtomicOperation::kSub) { + return atomicSub(mem, val); + } else if constexpr (operation == AtomicOperation::kSubSystem) { + return atomicSub_system(mem, val); + } else if constexpr (operation == AtomicOperation::kInc) { + return atomicInc(mem, val); + } else if constexpr (operation == AtomicOperation::kDec) { + return atomicDec(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeAdd) { + return unsafeAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeAdd) { + return safeAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kCASAdd) { + return CASAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kCASAddSystem) { + return CASAtomicAddSystem(mem, val); + } else if constexpr (operation == AtomicOperation::kBuiltinAdd) { + return __hip_atomic_fetch_add(mem, val, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinCAS) { + return BuiltinCASAtomicAdd(mem, val); + } +} + +// This kernel executes the atomic operation specified by the enumerator `operation`. Results of +// the atomic operations are stored in `old_vals`. Each thread executes the atomic operation on the +// same memory location `global_mem`. +// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic +// operations are executed on shared memory, and the result is copied back to `global_mem`. +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +// Indexes array `ptr`, with the size in bytes of each element specified by `pitch` +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +// Executes arbitrary load-store operations on the range specified by `begin_addr` and `end_addr` +__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +// This kernel executes the atomic operation specified by the enumerator `operation`. Results of the +// atomic operations are stored in `old_vals`. `global_mem` is an array with `width` number of +// elements. Each thread performs the atomic operation on the element that corresponds to its thread +// id (tid % width). +// The elements of `global_mem` can be larger than sizeof(TestType) with the actual size in bytes +// specified by `pitch`. This is done so we can test scenarios where threads target memory locations +// that are scattered over different cache lines. +// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic operations +// are executed on shared memory, and the result is copied back to `global_mem`. +// If `pitch` is greater than sizeof(TestType), random memory operations are performed in the empty +// space between consecutive atomic operations so that we can test that the atomic operations +// behaves correctly even with some interference. +// +// For example, given that sizeof(TestType) is 1, `width` is 3, and `pitch` is 4: +// +// 0 1 2 3 4 5 6 7 8 9 10 11 +// global_mem -> | x | | | | x | | | | x | | | | +// | pitch | pitch | pitch | +// +// In this scenario, the atomic operations will target the elements denoted with `x` (addresses 0, +// 4, 8). Random memory traffic will be generated on the addresses in between (1, 2, 3, 5, 6, 7, 9, +// 10, 11) +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned int pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + const auto n = cooperative_groups::this_grid().size() - width; + + TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = PerformAtomicOperation( + PitchedOffset(mem, pitch, tid % width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + GenerateMemoryTraffic(begin_addr, end_addr); + } + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +// Used to configure test run +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + auto HostIterationsPerThread() const { // number of iterations per host thread + return std::max(num_devices * kernel_count * ThreadCount() / 20, width); + } + + dim3 blocks; // number of blocks per kernel launch + dim3 threads; // number of threads per kernel launch + unsigned int num_devices = 1u; // number of devices used + unsigned int kernel_count = 1u; // number of kernels launched per device + unsigned int width = 1u; // number of memory locations targeted + unsigned int pitch = 0u; // defines spacing between memory locations + unsigned int host_thread_count = 0u; // number of host threads launched + LinearAllocs alloc_type; // type of allocation used +}; + +// Reference implementation used to verify results +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto val = GetTestValue(); + + const auto total_thread_count = p.num_devices * p.kernel_count * p.ThreadCount() + + p.host_thread_count * p.HostIterationsPerThread(); + + std::vector res_vals(p.width); + std::vector old_vals; + old_vals.reserve(total_thread_count); + + auto perform_op = [&](unsigned id) { + auto& res = res_vals[id % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem || + operation == AtomicOperation::kUnsafeAdd || + operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd || + operation == AtomicOperation::kCASAddSystem || + operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) { + res = res + val; + } else if constexpr (operation == AtomicOperation::kSub || + operation == AtomicOperation::kSubSystem) { + res = res - val; + } else if constexpr (operation == AtomicOperation::kInc) { + res = (res >= val) ? 0 : res + 1; + } else if constexpr (operation == AtomicOperation::kDec) { + res = ((res == 0) || (res > val)) ? val : res - 1; + } + }; + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + for (auto tid = 0u; tid < p.ThreadCount() - p.width; ++tid) { + perform_op(tid); + } + } + } + + for (auto i = 0u; i < p.host_thread_count; ++i) { + for (auto j = 0u; j < p.HostIterationsPerThread(); ++j) { + perform_op(j); + } + } + + return {res_vals, old_vals}; +} + +// Compares the results of the test kernel stored in `res_vals` with results generated by the +// reference implementation +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +// Launches the test kernel +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +// Performs a host atomic operation on parameter `mem` based on the `operation` enumerator. +template +void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch, TestType /*base_val*/) { + const auto val = GetTestValue(); + + for (auto i = 0u; i < iterations; ++i) { + if constexpr (operation == AtomicOperation::kAddSystem || + operation == AtomicOperation::kCASAddSystem || + operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) { + old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); + } else if constexpr (operation == AtomicOperation::kSubSystem) { + old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); + } + } +} + +// Launches host threads based on TestParams::host_thread_count that compete with the test kernel +// for the same resources +template +void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* const old_vals) { + if (p.host_thread_count == 0) { + return; + } + + const auto host_base_val = p.num_devices * p.kernel_count * p.ThreadCount(); + + std::vector threads; + for (auto i = 0u; i < p.host_thread_count; ++i) { + const auto iterations = p.HostIterationsPerThread(); + const auto thread_base_val = host_base_val + i * iterations; + threads.push_back(std::thread(HostAtomicOperation, iterations, mem, + old_vals + thread_base_val, p.width, p.pitch, thread_base_val)); + } + + for (auto& th : threads) { + th.join(); + } +} + +// This is the main body of the test: +// 1. Allocate memory based on TestParams::alloc_type +// 2. Launch kernels based on TestParams::num_devices and TestParams::kernel_count +// 3. Launch host threads based on TestParams::host_thread_count +// 4. Verify the results +template +void TestCore(const TestParams& p) { + const unsigned int flags = + p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u; + + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size, flags); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount() + + p.host_thread_count * p.HostIterationsPerThread()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), + old_vals); + } + } + + PerformHostAtomicOperation(p, mem_dev.host_ptr(), old_vals.data()); + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } + +inline dim3 GenerateBlockDimensions() { + int sm_count = 0; + HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); + return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); +} + +// Configures and creates the TestCore for a single device, and a single kernel launch +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + if constexpr ((operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) && + memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr ((operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateThreadDimensions(); + } + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + if constexpr ((operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateBlockDimensions(); + } + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +// Configures and creates the TestCore for a single device, and multiple kernel launches +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +// Configures and creates the TestCore for a multiple devices (and host), and multiple kernel +// launches +template +void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices, + const unsigned int kernel_count, + const unsigned int width, const unsigned int pitch, + const unsigned int host_thread_count = 0u) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + params.host_thread_count = host_thread_count; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicAdd.cc b/catch/unit/atomics/atomicAdd.cc new file mode 100644 index 0000000000..76eef23ac8 --- /dev/null +++ b/catch/unit/atomics/atomicAdd.cc @@ -0,0 +1,167 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicAdd_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicAdd atomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive_Multi_Kernel", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicAdd. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicAdd_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicAdd_int, kAtomicAdd_uint, kAtomicAdd_ulong, + kAtomicAdd_ulonglong, kAtomicAdd_float, kAtomicAdd_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicAdd_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicAdd_negative_kernels.cc b/catch/unit/atomics/atomicAdd_negative_kernels.cc new file mode 100644 index 0000000000..e0e8112cdf --- /dev/null +++ b/catch/unit/atomics/atomicAdd_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicAdd(int* address, int val) */ +__global__ void atomicAdd_int_v1(int* address, int* result) { *result = atomicAdd(&address, 1234); } + +__global__ void atomicAdd_int_v2(int* address, int* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_int_v3(int* address, int* result) { *result = atomicAdd(1234, 1234); } + +__global__ void atomicAdd_int_v4(Dummy* address, int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_int_v5(char* address, int* result) { *result = atomicAdd(address, 1234); } + +__global__ void atomicAdd_int_v6(short* address, int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_int_v7(long* address, int* result) { *result = atomicAdd(address, 1234); } + +__global__ void atomicAdd_int_v8(long long* address, int* result) { + *result = atomicAdd(address, 1234); +} + +/* unsigned int atomicAdd(unsigned int* address, unsigned int val) */ +__global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAdd(&address, 1234); +} + +__global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAdd(1234, 1234); +} + +__global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v5(char* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v6(short* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v7(long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(unsigned long* address, unsigned long val) */ +__global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAdd(&address, 1234); +} + +__global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAdd(1234, 1234); +} + +__global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(unsigned long long* address, unsigned long long val) */ +__global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(&address, 1234); +} + +__global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(1234, 1234); +} + +__global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(float* address, float val) */ +__global__ void atomicAdd_float_v1(float* address, float* result) { + *result = atomicAdd(&address, 1234.f); +} + +__global__ void atomicAdd_float_v2(float* address, float* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_float_v3(float* address, float* result) { + *result = atomicAdd(1234.f, 1234.f); +} + +__global__ void atomicAdd_float_v4(Dummy* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v5(char* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v6(short* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v7(long* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v8(long long* address, float* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(double* address, double val) */ +__global__ void atomicAdd_double_v1(double* address, double* result) { + *result = atomicAdd(&address, 1234.0); +} + +__global__ void atomicAdd_double_v2(double* address, double* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_double_v3(double* address, double* result) { + *result = atomicAdd(1234.0, 1234.0); +} + +__global__ void atomicAdd_double_v4(Dummy* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v5(char* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v6(short* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v7(long* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v8(long long* address, double* result) { + *result = atomicAdd(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh b/catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh new file mode 100644 index 0000000000..c5141d03bc --- /dev/null +++ b/catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicAdd_int{ + R"( + __global__ void atomicAdd_int_v1(int* address, int* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_int_v2(int* address, int* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_int_v3(int* address, int* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_int_v4(Dummy* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v5(char* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v6(short* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v7(long* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v8(long long* address, int* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_uint{ + R"( + __global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v5(char* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v6(short* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v7(long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_ulong{ + R"( + __global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_ulonglong{ + R"( + __global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_float{ + R"( + __global__ void atomicAdd_float_v1(float* address, float* result) { + *result = atomicAdd(&address, 1234.f); + } + + __global__ void atomicAdd_float_v2(float* address, float* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_float_v3(float* address, float* result) { + *result = atomicAdd(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_float_v4(Dummy* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v5(char* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v6(short* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v7(long* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v8(long long* address, float* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_double{ + R"( + __global__ void atomicAdd_double_v1(double* address, double* result) { + *result = atomicAdd(&address, 1234.0); + } + + __global__ void atomicAdd_double_v2(double* address, double* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_double_v3(double* address, double* result) { + *result = atomicAdd(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_double_v4(Dummy* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v5(char* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v6(short* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v7(long* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v8(long long* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicAdd_system.cc b/catch/unit/atomics/atomicAdd_system.cc new file mode 100644 index 0000000000..c51ce0ad1f --- /dev/null +++ b/catch/unit/atomics/atomicAdd_system.cc @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicAdd_system atomicAdd_system + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. While + * the kernel is running, the host performs atomic additions, in 4 threads, on the same memory + * location(s). Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. While + * the kernel is running, the host performs atomic additions, in 4 threads, on the same memory + * location(s). Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +} diff --git a/catch/unit/atomics/atomicAnd.cc b/catch/unit/atomics/atomicAnd.cc new file mode 100644 index 0000000000..756526a31c --- /dev/null +++ b/catch/unit/atomics/atomicAnd.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicAnd_negative_kernels_rtc.hh" +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicAnd atomicAnd + * @{ + * @ingroup AtomicsTest + * `atomicAnd(TestType* address, TestType* val)` - + * performs atomic bitwise AND between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicAnd with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicAnd_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = + GENERATE(kAtomicAnd_int, kAtomicAnd_uint, kAtomicAnd_ulong, kAtomicAnd_ulonglong); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicAnd_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{9}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicAnd_negative_kernels.cc b/catch/unit/atomics/atomicAnd_negative_kernels.cc new file mode 100644 index 0000000000..593399b45e --- /dev/null +++ b/catch/unit/atomics/atomicAnd_negative_kernels.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicAnd(int* address, int val) */ +__global__ void atomicAnd_int_v1(int* address, int* result) { *result = atomicAnd(&address, 1234); } + +__global__ void atomicAnd_int_v2(int* address, int* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_int_v3(int* address, int* result) { *result = atomicAnd(1234, 1234); } + +__global__ void atomicAnd_int_v4(Dummy* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v5(char* address, int* result) { *result = atomicAnd(address, 1234); } + +__global__ void atomicAnd_int_v6(short* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v7(long* address, int* result) { *result = atomicAnd(address, 1234); } + +__global__ void atomicAnd_int_v8(long long* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v9(float* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v10(double* address, int* result) { + *result = atomicAnd(address, 1234); +} + +/* unsigned int atomicAnd(unsigned int* address, unsigned int val) */ +__global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAnd(&address, 1234); +} + +__global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAnd(1234, 1234); +} + +__global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v5(char* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v6(short* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v7(long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v9(float* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v10(double* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +/* atomicAnd(unsigned long* address, unsigned long val) */ +__global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAnd(&address, 1234); +} + +__global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAnd(1234, 1234); +} + +__global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +/* atomicAnd(unsigned long long* address, unsigned long long val) */ +__global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(&address, 1234); +} + +__global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(1234, 1234); +} + +__global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} diff --git a/catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh b/catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh new file mode 100644 index 0000000000..d637feb9fe --- /dev/null +++ b/catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh @@ -0,0 +1,223 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicAnd_int{ + R"( + __global__ void atomicAnd_int_v1(int* address, int* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_int_v2(int* address, int* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_int_v3(int* address, int* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_int_v4(Dummy* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v5(char* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v6(short* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v7(long* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v8(long long* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v9(float* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v10(double* address, int* result) { + *result = atomicAnd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAnd_uint{ + R"( + __global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v5(char* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v6(short* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v7(long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v9(float* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v10(double* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAnd_ulong{ + R"( + __global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAnd_ulonglong{ + R"( + __global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + )"}; diff --git a/catch/unit/atomics/atomicAnd_system.cc b/catch/unit/atomics/atomicAnd_system.cc new file mode 100644 index 0000000000..e696a8ac26 --- /dev/null +++ b/catch/unit/atomics/atomicAnd_system.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicAnd_system atomicAnd_system + * @{ + * @ingroup AtomicsTest + * `atomicAnd_system(TestType* address, TestType* val)` - + * performs system-wide atomic bitwise AND between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd_system from multiple threads on scattered addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicCAS.cc b/catch/unit/atomics/atomicCAS.cc new file mode 100644 index 0000000000..3be684306d --- /dev/null +++ b/catch/unit/atomics/atomicCAS.cc @@ -0,0 +1,172 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicCAS_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicCAS atomicCAS + * @{ + * @ingroup AtomicsTest + */ + +#ifdef HT_NVIDIA +#define TYPES +#else +#define TYPES , float, double +#endif + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition, implemented using an atomic CAS operation, on a target memory location. Each thread + * will add the same value to the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the output array and target memory is validated to + * contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive", "", int, unsigned int, unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each + * thread will add the same value to the memory location, storing the return value into a separate + * output array slot corresponding to it. Once complete, the output array and target memory is + * validated to contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive_Multi_Kernel", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicCAS. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicCAS_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicCAS_int, kAtomicCAS_uint, kAtomicCAS_ulong, + kAtomicCAS_ulonglong, kAtomicCAS_float, kAtomicCAS_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicCAS_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicCAS_negative_kernels.cc b/catch/unit/atomics/atomicCAS_negative_kernels.cc new file mode 100644 index 0000000000..b0390bb3fa --- /dev/null +++ b/catch/unit/atomics/atomicCAS_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define ATOMIC_CAS_NEGATIVE_KERNEL(type_name) \ + __global__ void atomicCAS_v1(type_name* address, type_name* result) { \ + *result = atomicCAS(&address, 12, 13); \ + } \ + __global__ void atomicCAS_v2(type_name* address, type_name* result) { \ + *result = atomicCAS(address, address, 13); \ + } \ + __global__ void atomicCAS_v3(type_name* address, type_name* result) { \ + *result = atomicCAS(address, 12, address); \ + } \ + __global__ void atomicCAS_v4(Dummy* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v5(char* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v6(short* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v7(long* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v8(long long* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } + +ATOMIC_CAS_NEGATIVE_KERNEL(int) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned int) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long long) +ATOMIC_CAS_NEGATIVE_KERNEL(float) +ATOMIC_CAS_NEGATIVE_KERNEL(double) diff --git a/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh b/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh new file mode 100644 index 0000000000..952c4892fb --- /dev/null +++ b/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicCAS_int{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_int_v1(int* address, int* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_int_v2(int* address, int* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_int_v3(int* address, int* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_int_v4(Dummy* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v5(char* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v6(short* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v7(long* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v8(long long* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_uint{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v5(char* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v6(short* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v7(long* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v8(long long* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_ulong{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v5(char* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v6(short* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v7(long* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v8(long long* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_ulonglong{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_float{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_float_v1(float* address, float* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_float_v2(float* address, float* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_float_v3(float* address, float* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_float_v4(Dummy* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v5(char* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v6(short* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v7(long* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v8(long long* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_double{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_double_v1(double* address, double* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_double_v2(double* address, double* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_double_v3(double* address, double* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_double_v4(Dummy* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v5(char* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v6(short* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v7(long* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v8(long long* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; diff --git a/catch/unit/atomics/atomicCAS_system.cc b/catch/unit/atomics/atomicCAS_system.cc new file mode 100644 index 0000000000..8f2dd8306b --- /dev/null +++ b/catch/unit/atomics/atomicCAS_system.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicCAS_system atomicCAS_system + * @{ + * @ingroup AtomicsTest + */ + +#ifdef HT_NVIDIA +#define TYPES +#else +#define TYPES , float, double +#endif + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each + * thread will add the same value to the memory location, storing the return value into a separate + * output array slot corresponding to it. Once complete, the output array and target memory is + * validated to contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. + * Each thread will add the same value to the memory location, storing the return value into a + * separate output array slot corresponding to it. While the kernel is running, the host + * performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the + * output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. + * Each thread will add the same value to the memory location, storing the return value into a + * separate output array slot corresponding to it. While the kernel is running, the host + * performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the + * output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +} diff --git a/catch/unit/atomics/atomicDec.cc b/catch/unit/atomics/atomicDec.cc new file mode 100644 index 0000000000..e088ebe2b6 --- /dev/null +++ b/catch/unit/atomics/atomicDec.cc @@ -0,0 +1,164 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicDec_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicDec atomicDec + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * decrement on a target memory location. Each thread will decrement the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads decrement a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicDec + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicDec.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicDec_Positive", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic decrement on a target memory location. Each thread will decrement the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads decrement a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicDec + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicDec.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicDec_Positive_Multi_Kernel", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicDec. + * Test source + * ------------------------ + * - unit/atomics/atomicDec.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicDec_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicDec_uint); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicDec_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicDec_negative_kernels.cc b/catch/unit/atomics/atomicDec_negative_kernels.cc new file mode 100644 index 0000000000..4177ec0e70 --- /dev/null +++ b/catch/unit/atomics/atomicDec_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* unsigned int atomicDec(unsigned int* address, unsigned int val) */ +__global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicDec(&address, 1234); +} + +__global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicDec(address, address); +} + +__global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicDec(1234, 1234); +} + +__global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v5(char* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v6(short* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v7(long* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v8(long long* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicDec_negative_kernels_rtc.hh b/catch/unit/atomics/atomicDec_negative_kernels_rtc.hh new file mode 100644 index 0000000000..88ab33d01a --- /dev/null +++ b/catch/unit/atomics/atomicDec_negative_kernels_rtc.hh @@ -0,0 +1,68 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicDec_uint{ + R"( + __global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicDec(&address, 1234); + } + + __global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicDec(address, address); + } + + __global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicDec(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v5(char* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v6(short* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v7(long* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v8(long long* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + )"}; \ No newline at end of file diff --git a/catch/unit/atomics/atomicExch_common.hh b/catch/unit/atomics/atomicExch_common.hh index 1b4add5253..96238a089b 100644 --- a/catch/unit/atomics/atomicExch_common.hh +++ b/catch/unit/atomics/atomicExch_common.hh @@ -24,22 +24,26 @@ THE SOFTWARE. #include +#include #include #include #include -#include -enum class AtomicScopes { device, system }; +enum class AtomicScopes { device, system, builtin }; -template __device__ T perform_atomic_exch(T* address, T val) { +template +__device__ T perform_atomic_exch(T* address, T val) { if constexpr (scope == AtomicScopes::device) { return atomicExch(address, val); } else if (scope == AtomicScopes::system) { return atomicExch_system(address, val); + } else if (scope == AtomicScopes::builtin) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, memory_scope); } } -template +template __global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const old_vals) { __shared__ T shared_mem; @@ -52,7 +56,7 @@ __global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const ol __syncthreads(); } - old_vals[tid] = perform_atomic_exch(mem, static_cast(tid + 1)); + old_vals[tid] = perform_atomic_exch(mem, static_cast(tid + 1)); if constexpr (use_shared_mem) { __syncthreads(); @@ -67,7 +71,16 @@ __host__ __device__ T* pitched_offset(T* const ptr, const unsigned int pitch, return reinterpret_cast(byte_ptr + idx * pitch); } -template +__device__ void generate_memory_traffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +template __global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const unsigned int width, const unsigned pitch, const T base_val = 0) { extern __shared__ uint8_t shared_mem[]; @@ -84,8 +97,18 @@ __global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const __syncthreads(); } - old_vals[tid] = perform_atomic_exch(pitched_offset(mem, pitch, tid % width), - base_val + static_cast(tid + width)); + const auto n = cooperative_groups::this_grid().size() - width; + + T* atomic_addr = pitched_offset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = perform_atomic_exch( + pitched_offset(mem, pitch, tid % width), base_val + static_cast(tid + width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + generate_memory_traffic(begin_addr, end_addr); + } if constexpr (use_shared_mem) { __syncthreads(); @@ -255,14 +278,16 @@ class AtomicExchCRTP { } }; -template +template class AtomicExch : public AtomicExchCRTP, T, use_shared_mem, scope> { public: void LaunchKernel(const unsigned int shared_mem_size, const hipStream_t stream, T* const mem, T* const old_vals, const T base_val, const AtomicExchParams& p) const { - atomic_exch_kernel<<>>( - mem, old_vals, p.width, p.pitch, base_val); + atomic_exch_kernel + <<>>(mem, old_vals, p.width, p.pitch, + base_val); } void ValidateResults(std::vector& old_vals) const { @@ -281,23 +306,39 @@ inline dim3 GenerateAtomicExchBlockDimensions() { return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); } -template +template void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { AtomicExchParams params; params.num_devices = 1; params.kernel_count = 1; - params.threads = GenerateAtomicExchThreadDimensions(); + if constexpr (scope == AtomicScopes::builtin && memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr (scope == AtomicScopes::builtin && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateAtomicExchThreadDimensions(); + } params.width = width; params.pitch = pitch; SECTION("Global memory") { - params.blocks = GenerateAtomicExchBlockDimensions(); + if constexpr (scope == AtomicScopes::builtin && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateAtomicExchBlockDimensions(); + } using LA = LinearAllocs; for (const auto alloc_type : {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { - AtomicExch().run(params); + AtomicExch().run(params); } } } @@ -305,7 +346,7 @@ void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsi SECTION("Shared memory") { params.blocks = dim3(1); params.alloc_type = LinearAllocs::hipMalloc; - AtomicExch().run(params); + AtomicExch().run(params); } } diff --git a/catch/unit/atomics/atomicInc.cc b/catch/unit/atomics/atomicInc.cc new file mode 100644 index 0000000000..4c7f79a04f --- /dev/null +++ b/catch/unit/atomics/atomicInc.cc @@ -0,0 +1,164 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicInc_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicInc atomicInc + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * increment on a target memory location. Each thread will increment the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads increment a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicInc + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicInc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicInc_Positive", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will + * perform an atomic increment on a target memory location. Each thread will increment the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads increment a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicInc + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicInc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicInc_Positive_Multi_Kernel", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicInc. + * Test source + * ------------------------ + * - unit/atomics/atomicInc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicInc_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicInc_uint); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicInc_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicInc_negative_kernels.cc b/catch/unit/atomics/atomicInc_negative_kernels.cc new file mode 100644 index 0000000000..8c0f9e7fb6 --- /dev/null +++ b/catch/unit/atomics/atomicInc_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* unsigned int atomicInc(unsigned int* address, unsigned int val) */ +__global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicInc(&address, 1234); +} + +__global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicInc(address, address); +} + +__global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicInc(1234, 1234); +} + +__global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v5(char* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v6(short* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v7(long* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v8(long long* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicInc_negative_kernels_rtc.hh b/catch/unit/atomics/atomicInc_negative_kernels_rtc.hh new file mode 100644 index 0000000000..c4ef1e91c7 --- /dev/null +++ b/catch/unit/atomics/atomicInc_negative_kernels_rtc.hh @@ -0,0 +1,68 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicInc_uint{ + R"( + __global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicInc(&address, 1234); + } + + __global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicInc(address, address); + } + + __global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicInc(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v5(char* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v6(short* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v7(long* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v8(long long* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + )"}; \ No newline at end of file diff --git a/catch/unit/atomics/atomicMax.cc b/catch/unit/atomics/atomicMax.cc new file mode 100644 index 0000000000..e98ceaaf4c --- /dev/null +++ b/catch/unit/atomics/atomicMax.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicMax_negative_kernels_rtc.hh" +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMax atomicMax + * @{ + * @ingroup AtomicsTest + * `atomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the scaterred addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the scaterred addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicMax with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicMax_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicMax_int, kAtomicMax_uint, kAtomicMax_ulong, + kAtomicMax_ulonglong, kAtomicMax_float, kAtomicMax_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicMax_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicMax_negative_kernels.cc b/catch/unit/atomics/atomicMax_negative_kernels.cc new file mode 100644 index 0000000000..2f9b6a6306 --- /dev/null +++ b/catch/unit/atomics/atomicMax_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicMax(int* address, int val) */ +__global__ void atomicMax_int_v1(int* address, int* result) { *result = atomicMax(&address, 1234); } + +__global__ void atomicMax_int_v2(int* address, int* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_int_v3(int* address, int* result) { *result = atomicMax(1234, 1234); } + +__global__ void atomicMax_int_v4(Dummy* address, int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_int_v5(char* address, int* result) { *result = atomicMax(address, 1234); } + +__global__ void atomicMax_int_v6(short* address, int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_int_v7(long* address, int* result) { *result = atomicMax(address, 1234); } + +__global__ void atomicMax_int_v8(long long* address, int* result) { + *result = atomicMax(address, 1234); +} + +/* unsigned int atomicMax(unsigned int* address, unsigned int val) */ +__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v5(char* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v6(short* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v7(long* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(unsigned long* address, unsigned long val) */ +__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(unsigned long long* address, unsigned long long val) */ +__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(float* address, float val) */ +__global__ void atomicMax_float_v1(float* address, float* result) { + *result = atomicMax(&address, 1234.f); +} + +__global__ void atomicMax_float_v2(float* address, float* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_float_v3(float* address, float* result) { + *result = atomicMax(1234.f, 1234.f); +} + +__global__ void atomicMax_float_v4(Dummy* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v5(char* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v6(short* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v7(long* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v8(long long* address, float* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(double* address, double val) */ +__global__ void atomicMax_double_v1(double* address, double* result) { + *result = atomicMax(&address, 1234.0); +} + +__global__ void atomicMax_double_v2(double* address, double* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_double_v3(double* address, double* result) { + *result = atomicMax(1234.0, 1234.0); +} + +__global__ void atomicMax_double_v4(Dummy* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v5(char* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v6(short* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v7(long* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v8(long long* address, double* result) { + *result = atomicMax(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh b/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh new file mode 100644 index 0000000000..885f9f5250 --- /dev/null +++ b/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicMax_int{ + R"( + __global__ void atomicMax_int_v1(int* address, int* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_int_v2(int* address, int* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_int_v3(int* address, int* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_int_v4(Dummy* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v5(char* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v6(short* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v7(long* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v8(long long* address, int* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_uint{ + R"( + __global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v5(char* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v6(short* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v7(long* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v8(long long* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_ulong{ + R"( + __global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v5(char* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v6(short* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v7(long* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_ulonglong{ + R"( + __global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_float{ + R"( + __global__ void atomicMax_float_v1(float* address, float* result) { + *result = atomicMax(&address, 1234.f); + } + + __global__ void atomicMax_float_v2(float* address, float* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_float_v3(float* address, float* result) { + *result = atomicMax(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_float_v4(Dummy* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v5(char* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v6(short* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v7(long* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v8(long long* address, float* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_double{ + R"( + __global__ void atomicMax_double_v1(double* address, double* result) { + *result = atomicMax(&address, 1234.0); + } + + __global__ void atomicMax_double_v2(double* address, double* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_double_v3(double* address, double* result) { + *result = atomicMax(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_double_v4(Dummy* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v5(char* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v6(short* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v7(long* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v8(long long* address, double* result) { + *result = atomicMax(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicMax_system.cc b/catch/unit/atomics/atomicMax_system.cc new file mode 100644 index 0000000000..b07b566616 --- /dev/null +++ b/catch/unit/atomics/atomicMax_system.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMax_system atomicMax_system + * @{ + * @ingroup AtomicsTest + * `atomicMax_system(TestType* address, TestType* val)` - + * performs system-wide atomic maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { +#endif + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on scaterred addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicMin.cc b/catch/unit/atomics/atomicMin.cc new file mode 100644 index 0000000000..3d0f89412f --- /dev/null +++ b/catch/unit/atomics/atomicMin.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicMin_negative_kernels_rtc.hh" +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMin atomicMin + * @{ + * @ingroup AtomicsTest + * `atomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the scaterred addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the scaterred addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicMin with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicMin_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicMin_int, kAtomicMin_uint, kAtomicMin_ulong, + kAtomicMin_ulonglong, kAtomicMin_float, kAtomicMin_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicMin_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicMin_negative_kernels.cc b/catch/unit/atomics/atomicMin_negative_kernels.cc new file mode 100644 index 0000000000..644b7aaf8b --- /dev/null +++ b/catch/unit/atomics/atomicMin_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicMin(int* address, int val) */ +__global__ void atomicMin_int_v1(int* address, int* result) { *result = atomicMin(&address, 1234); } + +__global__ void atomicMin_int_v2(int* address, int* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_int_v3(int* address, int* result) { *result = atomicMin(1234, 1234); } + +__global__ void atomicMin_int_v4(Dummy* address, int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_int_v5(char* address, int* result) { *result = atomicMin(address, 1234); } + +__global__ void atomicMin_int_v6(short* address, int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_int_v7(long* address, int* result) { *result = atomicMin(address, 1234); } + +__global__ void atomicMin_int_v8(long long* address, int* result) { + *result = atomicMin(address, 1234); +} + +/* unsigned int atomicMin(unsigned int* address, unsigned int val) */ +__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v5(char* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v6(short* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v7(long* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(unsigned long* address, unsigned long val) */ +__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(unsigned long long* address, unsigned long long val) */ +__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(float* address, float val) */ +__global__ void atomicMin_float_v1(float* address, float* result) { + *result = atomicMin(&address, 1234.f); +} + +__global__ void atomicMin_float_v2(float* address, float* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_float_v3(float* address, float* result) { + *result = atomicMin(1234.f, 1234.f); +} + +__global__ void atomicMin_float_v4(Dummy* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v5(char* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v6(short* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v7(long* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v8(long long* address, float* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(double* address, double val) */ +__global__ void atomicMin_double_v1(double* address, double* result) { + *result = atomicMin(&address, 1234.0); +} + +__global__ void atomicMin_double_v2(double* address, double* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_double_v3(double* address, double* result) { + *result = atomicMin(1234.0, 1234.0); +} + +__global__ void atomicMin_double_v4(Dummy* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v5(char* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v6(short* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v7(long* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v8(long long* address, double* result) { + *result = atomicMin(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh b/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh new file mode 100644 index 0000000000..cc1ae5c7af --- /dev/null +++ b/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicMin_int{ + R"( + __global__ void atomicMin_int_v1(int* address, int* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_int_v2(int* address, int* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_int_v3(int* address, int* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_int_v4(Dummy* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v5(char* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v6(short* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v7(long* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v8(long long* address, int* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_uint{ + R"( + __global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v5(char* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v6(short* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v7(long* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v8(long long* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_ulong{ + R"( + __global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v5(char* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v6(short* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v7(long* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_ulonglong{ + R"( + __global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_float{ + R"( + __global__ void atomicMin_float_v1(float* address, float* result) { + *result = atomicMin(&address, 1234.f); + } + + __global__ void atomicMin_float_v2(float* address, float* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_float_v3(float* address, float* result) { + *result = atomicMin(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_float_v4(Dummy* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v5(char* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v6(short* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v7(long* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v8(long long* address, float* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_double{ + R"( + __global__ void atomicMin_double_v1(double* address, double* result) { + *result = atomicMin(&address, 1234.0); + } + + __global__ void atomicMin_double_v2(double* address, double* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_double_v3(double* address, double* result) { + *result = atomicMin(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_double_v4(Dummy* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v5(char* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v6(short* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v7(long* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v8(long long* address, double* result) { + *result = atomicMin(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicMin_system.cc b/catch/unit/atomics/atomicMin_system.cc new file mode 100644 index 0000000000..7474a2e10d --- /dev/null +++ b/catch/unit/atomics/atomicMin_system.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMin_system atomicMin_system + * @{ + * @ingroup AtomicsTest + * `atomicMin_system(TestType* address, TestType* val)` - + * performs system-wide atomic minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { +#endif + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on scaterred addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicOr.cc b/catch/unit/atomics/atomicOr.cc new file mode 100644 index 0000000000..e2ae9c6825 --- /dev/null +++ b/catch/unit/atomics/atomicOr.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicOr_negative_kernels_rtc.hh" +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicOr atomicOr + * @{ + * @ingroup AtomicsTest + * `atomicOr(TestType* address, TestType* val)` - + * performs atomic bitwise OR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicAnd with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicOr_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = + GENERATE(kAtomicOr_int, kAtomicOr_uint, kAtomicOr_ulong, kAtomicOr_ulonglong); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicOr_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{9}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicOr_negative_kernels.cc b/catch/unit/atomics/atomicOr_negative_kernels.cc new file mode 100644 index 0000000000..47a56e3a0d --- /dev/null +++ b/catch/unit/atomics/atomicOr_negative_kernels.cc @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicOr(int* address, int val) */ +__global__ void atomicOr_int_v1(int* address, int* result) { *result = atomicOr(&address, 1234); } + +__global__ void atomicOr_int_v2(int* address, int* result) { *result = atomicOr(address, address); } + +__global__ void atomicOr_int_v3(int* address, int* result) { *result = atomicOr(1234, 1234); } + +__global__ void atomicOr_int_v4(Dummy* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v5(char* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v6(short* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v7(long* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v8(long long* address, int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_int_v9(float* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v10(double* address, int* result) { + *result = atomicOr(address, 1234); +} + +/* unsigned int atomicOr(unsigned int* address, unsigned int val) */ +__global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicOr(&address, 1234); +} + +__global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicOr(address, address); +} + +__global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicOr(1234, 1234); +} + +__global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v5(char* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v6(short* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v7(long* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v8(long long* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v9(float* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v10(double* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +/* atomicOr(unsigned long* address, unsigned long val) */ +__global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicOr(&address, 1234); +} + +__global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicOr(address, address); +} + +__global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicOr(1234, 1234); +} + +__global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v5(char* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v6(short* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v7(long* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v9(float* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v10(double* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +/* atomicOr(unsigned long long* address, unsigned long long val) */ +__global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(&address, 1234); +} + +__global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(address, address); +} + +__global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(1234, 1234); +} + +__global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} diff --git a/catch/unit/atomics/atomicOr_negative_kernels_rtc.hh b/catch/unit/atomics/atomicOr_negative_kernels_rtc.hh new file mode 100644 index 0000000000..dd4117e704 --- /dev/null +++ b/catch/unit/atomics/atomicOr_negative_kernels_rtc.hh @@ -0,0 +1,223 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicOr_int{ + R"( + __global__ void atomicOr_int_v1(int* address, int* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_int_v2(int* address, int* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_int_v3(int* address, int* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_int_v4(Dummy* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v5(char* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v6(short* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v7(long* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v8(long long* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v9(float* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v10(double* address, int* result) { + *result = atomicOr(address, 1234); + } + )"}; + +static constexpr auto kAtomicOr_uint{ + R"( + __global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v5(char* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v6(short* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v7(long* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v8(long long* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v9(float* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v10(double* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + )"}; + +static constexpr auto kAtomicOr_ulong{ + R"( + __global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v5(char* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v6(short* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v7(long* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v9(float* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v10(double* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + )"}; + +static constexpr auto kAtomicOr_ulonglong{ + R"( + __global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + )"}; diff --git a/catch/unit/atomics/atomicOr_system.cc b/catch/unit/atomics/atomicOr_system.cc new file mode 100644 index 0000000000..0239056e3f --- /dev/null +++ b/catch/unit/atomics/atomicOr_system.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicOr_system atomicOr_system + * @{ + * @ingroup AtomicsTest + * `atomicOr_system(TestType* address, TestType* val)` - + * performs system-wide atomic bitwise OR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicOr_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr_system from multiple threads on scattered addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicSub.cc b/catch/unit/atomics/atomicSub.cc new file mode 100644 index 0000000000..75d1678c46 --- /dev/null +++ b/catch/unit/atomics/atomicSub.cc @@ -0,0 +1,167 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicSub_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicSub atomicSub + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * subtraction on a target memory location. Each thread will subtract the same value from the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicSub.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_Positive", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic subtraction on a target memory location. Each thread will subtract the same value from + * the memory location, storing the return value into a separate output array slot corresponding to + * it. Once complete, the output array and target memory is validated to contain all the expected + * values. Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_Positive_Multi_Kernel", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicSub. + * Test source + * ------------------------ + * - unit/atomics/atomicSub.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicSub_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicSub_int, kAtomicSub_uint, kAtomicSub_ulong, + kAtomicSub_ulonglong, kAtomicSub_float, kAtomicSub_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicSub_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicSub_negative_kernels.cc b/catch/unit/atomics/atomicSub_negative_kernels.cc new file mode 100644 index 0000000000..c13b243db1 --- /dev/null +++ b/catch/unit/atomics/atomicSub_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicSub(int* address, int val) */ +__global__ void atomicSub_int_v1(int* address, int* result) { *result = atomicSub(&address, 1234); } + +__global__ void atomicSub_int_v2(int* address, int* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_int_v3(int* address, int* result) { *result = atomicSub(1234, 1234); } + +__global__ void atomicSub_int_v4(Dummy* address, int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_int_v5(char* address, int* result) { *result = atomicSub(address, 1234); } + +__global__ void atomicSub_int_v6(short* address, int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_int_v7(long* address, int* result) { *result = atomicSub(address, 1234); } + +__global__ void atomicSub_int_v8(long long* address, int* result) { + *result = atomicSub(address, 1234); +} + +/* unsigned int atomicSub(unsigned int* address, unsigned int val) */ +__global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicSub(&address, 1234); +} + +__global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicSub(1234, 1234); +} + +__global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v5(char* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v6(short* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v7(long* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v8(long long* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(unsigned long* address, unsigned long val) */ +__global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicSub(&address, 1234); +} + +__global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicSub(1234, 1234); +} + +__global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v5(char* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v6(short* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v7(long* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(unsigned long long* address, unsigned long long val) */ +__global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(&address, 1234); +} + +__global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(1234, 1234); +} + +__global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(float* address, float val) */ +__global__ void atomicSub_float_v1(float* address, float* result) { + *result = atomicSub(&address, 1234.f); +} + +__global__ void atomicSub_float_v2(float* address, float* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_float_v3(float* address, float* result) { + *result = atomicSub(1234.f, 1234.f); +} + +__global__ void atomicSub_float_v4(Dummy* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v5(char* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v6(short* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v7(long* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v8(long long* address, float* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(double* address, double val) */ +__global__ void atomicSub_double_v1(double* address, double* result) { + *result = atomicSub(&address, 1234.0); +} + +__global__ void atomicSub_double_v2(double* address, double* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_double_v3(double* address, double* result) { + *result = atomicSub(1234.0, 1234.0); +} + +__global__ void atomicSub_double_v4(Dummy* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v5(char* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v6(short* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v7(long* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v8(long long* address, double* result) { + *result = atomicSub(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicSub_negative_kernels_rtc.hh b/catch/unit/atomics/atomicSub_negative_kernels_rtc.hh new file mode 100644 index 0000000000..543dba3026 --- /dev/null +++ b/catch/unit/atomics/atomicSub_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicSub_int{ + R"( + __global__ void atomicSub_int_v1(int* address, int* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_int_v2(int* address, int* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_int_v3(int* address, int* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_int_v4(Dummy* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v5(char* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v6(short* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v7(long* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v8(long long* address, int* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_uint{ + R"( + __global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v5(char* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v6(short* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v7(long* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v8(long long* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_ulong{ + R"( + __global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v5(char* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v6(short* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v7(long* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_ulonglong{ + R"( + __global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_float{ + R"( + __global__ void atomicSub_float_v1(float* address, float* result) { + *result = atomicSub(&address, 1234.f); + } + + __global__ void atomicSub_float_v2(float* address, float* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_float_v3(float* address, float* result) { + *result = atomicSub(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_float_v4(Dummy* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v5(char* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v6(short* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v7(long* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v8(long long* address, float* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_double{ + R"( + __global__ void atomicSub_double_v1(double* address, double* result) { + *result = atomicSub(&address, 1234.0); + } + + __global__ void atomicSub_double_v2(double* address, double* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_double_v3(double* address, double* result) { + *result = atomicSub(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_double_v4(Dummy* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v5(char* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v6(short* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v7(long* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v8(long long* address, double* result) { + *result = atomicSub(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicSub_system.cc b/catch/unit/atomics/atomicSub_system.cc new file mode 100644 index 0000000000..0abccf754f --- /dev/null +++ b/catch/unit/atomics/atomicSub_system.cc @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicSub_system atomicSub_system + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the + * memory location, storing the return value into a separate output array slot corresponding to + * it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same + * memory location(s). Once complete, the output array and target memory is validated to contain + * all the expected values. Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the + * memory location, storing the return value into a separate output array slot corresponding to + * it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same + * memory location(s). Once complete, the output array and target memory is validated to contain + * all the expected values. Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +} diff --git a/catch/unit/atomics/atomicXor.cc b/catch/unit/atomics/atomicXor.cc new file mode 100644 index 0000000000..0fb31252c5 --- /dev/null +++ b/catch/unit/atomics/atomicXor.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicXor_negative_kernels_rtc.hh" +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicXor atomicXor + * @{ + * @ingroup AtomicsTest + * `atomicXor(TestType* address, TestType* val)` - + * performs atomic bitwise XOR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size - 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size - 1, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicXor with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicXor_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = + GENERATE(kAtomicXor_int, kAtomicXor_uint, kAtomicXor_ulong, kAtomicXor_ulonglong); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicXor_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{9}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicXor_negative_kernels.cc b/catch/unit/atomics/atomicXor_negative_kernels.cc new file mode 100644 index 0000000000..a180afd6db --- /dev/null +++ b/catch/unit/atomics/atomicXor_negative_kernels.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicXor(int* address, int val) */ +__global__ void atomicXor_int_v1(int* address, int* result) { *result = atomicXor(&address, 1234); } + +__global__ void atomicXor_int_v2(int* address, int* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_int_v3(int* address, int* result) { *result = atomicXor(1234, 1234); } + +__global__ void atomicXor_int_v4(Dummy* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v5(char* address, int* result) { *result = atomicXor(address, 1234); } + +__global__ void atomicXor_int_v6(short* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v7(long* address, int* result) { *result = atomicXor(address, 1234); } + +__global__ void atomicXor_int_v8(long long* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v9(float* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v10(double* address, int* result) { + *result = atomicXor(address, 1234); +} + +/* unsigned int atomicXor(unsigned int* address, unsigned int val) */ +__global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicXor(&address, 1234); +} + +__global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicXor(1234, 1234); +} + +__global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v5(char* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v6(short* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v7(long* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v8(long long* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v9(float* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v10(double* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +/* atomicXor(unsigned long* address, unsigned long val) */ +__global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicXor(&address, 1234); +} + +__global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicXor(1234, 1234); +} + +__global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v5(char* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v6(short* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v7(long* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v9(float* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicXor_ulong_v10(double* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +/* atomicXor(unsigned long long* address, unsigned long long val) */ +__global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(&address, 1234); +} + +__global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(1234, 1234); +} + +__global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} diff --git a/catch/unit/atomics/atomicXor_negative_kernels_rtc.hh b/catch/unit/atomics/atomicXor_negative_kernels_rtc.hh new file mode 100644 index 0000000000..3d4e19c7e7 --- /dev/null +++ b/catch/unit/atomics/atomicXor_negative_kernels_rtc.hh @@ -0,0 +1,223 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicXor_int{ + R"( + __global__ void atomicXor_int_v1(int* address, int* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_int_v2(int* address, int* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_int_v3(int* address, int* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_int_v4(Dummy* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v5(char* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v6(short* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v7(long* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v8(long long* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v9(float* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v10(double* address, int* result) { + *result = atomicXor(address, 1234); + } + )"}; + +static constexpr auto kAtomicXor_uint{ + R"( + __global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v5(char* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v6(short* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v7(long* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v8(long long* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v9(float* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v10(double* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + )"}; + +static constexpr auto kAtomicXor_ulong{ + R"( + __global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v5(char* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v6(short* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v7(long* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v9(float* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v10(double* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + )"}; + +static constexpr auto kAtomicXor_ulonglong{ + R"( + __global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + )"}; diff --git a/catch/unit/atomics/atomicXor_system.cc b/catch/unit/atomics/atomicXor_system.cc new file mode 100644 index 0000000000..fbfb82d36d --- /dev/null +++ b/catch/unit/atomics/atomicXor_system.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicXor_system atomicXor_system + * @{ + * @ingroup AtomicsTest + * `atomicXor_system(TestType* address, TestType* val)` - + * performs system-wide atomic bitwise XOR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicXor_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor_system from multiple threads on scattered addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomic_builtin_kernels.cc b/catch/unit/atomics/atomic_builtin_kernels.cc new file mode 100644 index 0000000000..27cd3eb95e --- /dev/null +++ b/catch/unit/atomics/atomic_builtin_kernels.cc @@ -0,0 +1,458 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +constexpr int kMemOrder = __ATOMIC_RELAXED; +constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + +// Trivially-copyable class. +class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; +}; + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void StoreCompileKernel(int* x) { + // Valid combinations + __hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + __hip_atomic_store(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + __hip_atomic_store(*x, 1, kMemOrder, kMemScope); + // Consume not allowed by C++1 for store + __hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope); + // Acquire not allowed by C++11 for store + __hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope); + // Acquire-Release not allowed by C++11 for store + __hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope); + // Memory order is out of bounds + __hip_atomic_store(x, 1, -1, kMemScope); + __hip_atomic_store(x, 1, 10, kMemScope); + // Memory scope is out of bounds + __hip_atomic_store(x, 1, kMemOrder, -1); + __hip_atomic_store(x, 1, kMemOrder, 10); + + // Storing an object that is not trivially-copyable + Dummy dummy_a{}; + Dummy dummy_b{}; + __hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope); + + // Storing an object that is trivially-copyable + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + __hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope); +} + +__global__ void LoadCompileKernel(int* x, int* y) { + // Valid combinations + *y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope); + + // Value instead of pointer to the atomic builtin for 1st parameter + *y = __hip_atomic_load(*x, kMemOrder, kMemScope); + // Release not allowed by C++11 for load + *y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope); + // Acquire-Release not allowed by C++11 for load + *y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope); + // Memory order is out of bounds + *y = __hip_atomic_load(x, -1, kMemScope); + *y = __hip_atomic_load(x, 10, kMemScope); + // Memory scope is out of bounds + *y = __hip_atomic_load(x, kMemOrder, -1); + *y = __hip_atomic_load(x, kMemOrder, 10); + + // Loading an object that is not trivially-copyable + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope); + + // Loading an object that is trivially-copyable + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope); +} + +__global__ void CompareWeakCompileKernel(int* x, int* expected) { + bool res{false}; + // Valid combinations + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + // Release not allowed on fail by C++11 + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + // Acquire-Release not allowed on fail by C++11 + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + // Fail stronger than success + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + // Pointer to a non-const type + res = __hip_atomic_compare_exchange_weak(reinterpret_cast(x), expected, 1, kMemOrder, + kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + // Memory order on success is out of bounds + res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope); + // Memory order on failure is out of bounds + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope); + // Memory scope is out of bounds + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10); + + // User-defined class is not trivially-copyable and therefore cannot be atomically copied + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + // User-defined class is trivially-copyable and can be atomically copied + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder, + kMemScope); +} + +__global__ void CompareStrongCompileKernel(int* x, int* expected) { + bool res{false}; + // Valid combinations + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + // Release not allowed on fail by C++11 + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + // Acquire-Release not allowed on fail by C++11 + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + // Fail stronger than success + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + // Pointer to a non-const type + res = __hip_atomic_compare_exchange_strong(reinterpret_cast(x), expected, 1, + kMemOrder, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin for 1st parameter + res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + // Memory order on success is out of bounds + res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope); + // Memory order on failure is out of bounds + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope); + // Memory scope is out of bounds + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10); + + // User-defined class is not trivially-copyable and therefore cannot be atomically copied + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + // User-defined class is trivially-copyable and can be atomically copied + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, + kMemOrder, kMemScope); +} + +__global__ void ExchangeCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_exchange(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_exchange(x, 1, -1, kMemScope); + old = __hip_atomic_exchange(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_exchange(x, 1, kMemOrder, -1); + old = __hip_atomic_exchange(x, 1, kMemOrder, 10); + + // User-defined class is not trivially-copyable and therefore cannot be atomically copied + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope); + + // User-defined class is trivially-copyable and can be atomically copied + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope); +} + +__global__ void FetchAddCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_add(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_add(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_add(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope); +} + +__global__ void FetchAndCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_and(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_and(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_and(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10); + + // Value must be an integer + Dummy dummy{}; + old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope); +} + +__global__ void FetchOrCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_or(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_or(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_or(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10); + + // Value must be an integer + Dummy dummy{}; + old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope); +} + +__global__ void FetchXorCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_xor(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10); + + // Value must be an integer + Dummy dummy{}; + old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope); +} + +__global__ void FetchMaxCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_max(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_max(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_max(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10); + + // Value must be integer or floating point type + Dummy dummy{}; + old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope); +} + +__global__ void FetchMinCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_min(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_min(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_min(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10); + + // Value must be integer or floating point type + Dummy dummy{}; + old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope); +} diff --git a/catch/unit/atomics/atomic_builtins.cc b/catch/unit/atomics/atomic_builtins.cc new file mode 100644 index 0000000000..c5ade6b30a --- /dev/null +++ b/catch/unit/atomics/atomic_builtins.cc @@ -0,0 +1,97 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "atomic_builtins_kernels_rtc.hh" + +/** + * @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add + * @{ + * @ingroup AtomicsTest + */ + +void AtomicBuiltinsRTCWrapper(const char* program_source, int expected_errors_num, + int expected_warnings_num) { + hiprtcProgram program{}; + HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "atomics_builtins_kernels.cc", 0, + nullptr, nullptr)); + + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + int warning_count{0}; + + std::string error_message{"error:"}; + std::string warning_message{"warning:"}; + + size_t npos_e = log.find(error_message, 0); + while (npos_e != std::string::npos) { + ++error_count; + npos_e = log.find(error_message, npos_e + 1); + } + + size_t npos_w = log.find(warning_message, 0); + while (npos_w != std::string::npos) { + ++warning_count; + npos_w = log.find(warning_message, npos_w + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_errors_num); + REQUIRE(warning_count == expected_warnings_num); +} + +/** + * Test Description + * ------------------------ + * - Compiles atomic builtins while passing parameters that shall cause: + * -# Compiler warnings + * -# Compiler errors + * Test source + * ------------------------ + * - unit/atomics/atomic_builtins.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_AtomicBuiltins_Negative_Parameters_RTC") { + AtomicBuiltinsRTCWrapper(kBuiltinStore, 5, 5); + AtomicBuiltinsRTCWrapper(kBuiltinLoad, 4, 4); + /* Begin: Should be 5 errors, 6 warnings for both. See EXSWHTEC-309*/ + AtomicBuiltinsRTCWrapper(kBuiltinCompExWeak, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinCompExStrong, 5, 2); + /* End. */ + AtomicBuiltinsRTCWrapper(kBuiltinExchange, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchAdd, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchAnd, 7, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchOr, 7, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchXor, 7, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchMax, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchMin, 5, 2); +} diff --git a/catch/unit/atomics/atomic_builtins_kernels_rtc.hh b/catch/unit/atomics/atomic_builtins_kernels_rtc.hh new file mode 100644 index 0000000000..1339eaaa45 --- /dev/null +++ b/catch/unit/atomics/atomic_builtins_kernels_rtc.hh @@ -0,0 +1,590 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Positive and negative kernels used for the builtin atomic Test Cases that are using RTC. +*/ + +static constexpr auto kBuiltinStore{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void StoreCompileKernel(int* x) { + __hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + __hip_atomic_store(reinterpret_cast(x), 1, kMemOrder, kMemScope); + __hip_atomic_store(*x, 1, kMemOrder, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope); + __hip_atomic_store(x, 1, -1, kMemScope); + __hip_atomic_store(x, 1, 10, kMemScope); + __hip_atomic_store(x, 1, kMemOrder, -1); + __hip_atomic_store(x, 1, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + __hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope); + + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + __hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinLoad{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void LoadCompileKernel(int* x, int* y) { + *y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope); + + *y = __hip_atomic_load(*x, kMemOrder, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope); + *y = __hip_atomic_load(x, -1, kMemScope); + *y = __hip_atomic_load(x, 10, kMemScope); + *y = __hip_atomic_load(x, kMemOrder, -1); + *y = __hip_atomic_load(x, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope); + + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinCompExWeak{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void CompareWeakCompileKernel(int* x, int* expected) { + bool res{false}; + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + res = __hip_atomic_compare_exchange_weak(reinterpret_cast(x), expected, 1, kMemOrder, + kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder, + kMemScope); + } +)"}; + +static constexpr auto kBuiltinCompExStrong{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void CompareStrongCompileKernel(int* x, int* expected) { + bool res{false}; + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + res = __hip_atomic_compare_exchange_strong(reinterpret_cast(x), expected, 1, + kMemOrder, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, + kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinExchange{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void ExchangeCompileKernel(int* x) { + int old{}; + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_exchange(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_exchange(x, 1, -1, kMemScope); + old = __hip_atomic_exchange(x, 1, 10, kMemScope); + old = __hip_atomic_exchange(x, 1, kMemOrder, -1); + old = __hip_atomic_exchange(x, 1, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope); + + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchAdd{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchAddCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_add(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_add(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_add(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchAnd{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchAndCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_and(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_and(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_and(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchOr{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchOrCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_or(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_or(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_or(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope); + } +)"}; + +static auto constexpr kBuiltinFetchXor{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchXorCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_xor(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchMax{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchMaxCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_max(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_max(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_max(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchMin{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchMinCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_min(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_min(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_min(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope); + } +)"}; diff --git a/catch/unit/atomics/bitwise_common.hh b/catch/unit/atomics/bitwise_common.hh new file mode 100644 index 0000000000..887d25d4f9 --- /dev/null +++ b/catch/unit/atomics/bitwise_common.hh @@ -0,0 +1,412 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +namespace Bitwise { +enum class AtomicOperation { + kAnd = 0, + kAndSystem, + kOr, + kOrSystem, + kXor, + kXorSystem, + kBuiltinAnd, + kBuiltinOr, + kBuiltinXor +}; + +constexpr auto kMask = 0xAAAA; +constexpr auto kTestValue = 0x4545; +constexpr auto kAndTestValue = 0xFFFF; + +template +__host__ __device__ TestType GetTestValue() { + if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) { + return kAndTestValue; + } + + return kTestValue; +} + +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto mask = kMask; + + if constexpr (operation == AtomicOperation::kAnd) { + return atomicAnd(mem, mask); + } else if constexpr (operation == AtomicOperation::kAndSystem) { + return atomicAnd_system(mem, mask); + } else if constexpr (operation == AtomicOperation::kOr) { + return atomicOr(mem, mask); + } else if constexpr (operation == AtomicOperation::kOrSystem) { + return atomicOr_system(mem, mask); + } else if constexpr (operation == AtomicOperation::kXor) { + return atomicXor(mem, mask); + } else if constexpr (operation == AtomicOperation::kXorSystem) { + return atomicXor_system(mem, mask); + } else if constexpr (operation == AtomicOperation::kBuiltinAnd) { + return __hip_atomic_fetch_and(mem, mask, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinOr) { + return __hip_atomic_fetch_or(mem, mask, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinXor) { + return __hip_atomic_fetch_xor(mem, mask, __ATOMIC_RELAXED, memory_scope); + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + const auto n = cooperative_groups::this_grid().size() - width; + + TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = PerformAtomicOperation( + PitchedOffset(mem, pitch, tid % width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + GenerateMemoryTraffic(begin_addr, end_addr); + } + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + dim3 blocks; + dim3 threads; + unsigned int num_devices = 1u; + unsigned int kernel_count = 1u; + unsigned int width = 1u; + unsigned int pitch = 0u; + unsigned int host_thread_count = 0u; + LinearAllocs alloc_type; +}; + +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount(); + + TestType test_value = GetTestValue(); + const auto mask = kMask; + std::vector res_vals(p.width, test_value); + std::vector old_vals; + old_vals.reserve(thread_count); + + for (auto tid = 0u; tid < thread_count; ++tid) { + auto& res = res_vals[tid % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem || + operation == AtomicOperation::kBuiltinAnd) { + res = res & mask; + } else if constexpr (operation == AtomicOperation::kOr || + operation == AtomicOperation::kOrSystem || + operation == AtomicOperation::kBuiltinOr) { + res = res | mask; + } else if constexpr (operation == AtomicOperation::kXor || + operation == AtomicOperation::kXorSystem || + operation == AtomicOperation::kBuiltinXor) { + res = res ^ mask; + } + } + + return {res_vals, old_vals}; +} + +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +template +void TestCore(const TestParams& p) { + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + TestType test_value = GetTestValue(); + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) { + HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice)); + } + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), + old_vals); + } + } + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } + +inline dim3 GenerateBlockDimensions() { + int sm_count = 0; + HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); + return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); +} + +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + if constexpr ((operation == AtomicOperation::kBuiltinAnd || + operation == AtomicOperation::kBuiltinOr || + operation == AtomicOperation::kBuiltinXor) && + memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr ((operation == AtomicOperation::kBuiltinAnd || + operation == AtomicOperation::kBuiltinOr || + operation == AtomicOperation::kBuiltinXor) && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateThreadDimensions(); + } + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + if constexpr ((operation == AtomicOperation::kBuiltinAnd || + operation == AtomicOperation::kBuiltinOr || + operation == AtomicOperation::kBuiltinXor) && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateBlockDimensions(); + } + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +template +void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, + const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +} // namespace Bitwise diff --git a/catch/unit/atomics/memory_order_common.hh b/catch/unit/atomics/memory_order_common.hh new file mode 100644 index 0000000000..d555913fef --- /dev/null +++ b/catch/unit/atomics/memory_order_common.hh @@ -0,0 +1,433 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +enum class BuiltinAtomicOperation { + kLoadStore = 0, + kExchange, + kCompareExchangeStrong, + kCompareExchangeWeak, + kAdd, + kAnd, + kOr, + kXor, + kMin, + kMax +}; + +template +__host__ __device__ void SetFlag(int* const flag) { +#ifdef __HIP_DEVICE_COMPILE__ + if constexpr (operation == BuiltinAtomicOperation::kLoadStore) { + static_assert(memory_order != __ATOMIC_ACQ_REL); + __hip_atomic_store(flag, 1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kExchange) { + __hip_atomic_exchange(flag, 1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) { + int compare = 0; + __hip_atomic_compare_exchange_strong(flag, &compare, 1, memory_order, __ATOMIC_RELAXED, + memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) { + int compare = 0; + while (!__hip_atomic_compare_exchange_weak(flag, &compare, 1, memory_order, __ATOMIC_RELAXED, + memory_scope)) + compare = 0; + } else if constexpr (operation == BuiltinAtomicOperation::kAdd) { + __hip_atomic_fetch_add(flag, 1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kAnd) { + __hip_atomic_fetch_and(flag, 0x0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kOr) { + __hip_atomic_fetch_or(flag, 0x1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kXor) { + __hip_atomic_fetch_xor(flag, 0x1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMin) { + __hip_atomic_fetch_min(flag, -1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMax) { + __hip_atomic_fetch_max(flag, 1, memory_order, memory_scope); + } +#else + if constexpr (operation == BuiltinAtomicOperation::kAnd) { + __atomic_store_n(flag, 0, __ATOMIC_RELEASE); + } else { + __atomic_store_n(flag, 1, __ATOMIC_RELEASE); + } +#endif +} + +template +__host__ __device__ int FetchFlag(int* const flag) { +#ifdef __HIP_DEVICE_COMPILE__ + if constexpr (operation == BuiltinAtomicOperation::kLoadStore) { + static_assert(memory_order != __ATOMIC_ACQ_REL); + return __hip_atomic_load(flag, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kExchange) { + return __hip_atomic_exchange(flag, 0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) { + int compare = 1; + __hip_atomic_compare_exchange_strong( + flag, &compare, 1, memory_order, + memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope); + return compare; + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) { + int compare = 1; + __hip_atomic_compare_exchange_weak( + flag, &compare, 1, memory_order, + memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope); + return compare; + } else if constexpr (operation == BuiltinAtomicOperation::kAdd) { + return __hip_atomic_fetch_add(flag, 0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kAnd) { + return !__hip_atomic_fetch_and(flag, 0x1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kOr) { + return __hip_atomic_fetch_or(flag, 0x0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kXor) { + return __hip_atomic_fetch_xor(flag, 0x0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMin) { + return __hip_atomic_fetch_min(flag, 0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMax) { + return __hip_atomic_fetch_max(flag, 0, memory_order, memory_scope); + } +#else + if constexpr (operation == BuiltinAtomicOperation::kAnd) { + return !__atomic_load_n(flag, __ATOMIC_ACQUIRE); + } else { + return __atomic_load_n(flag, __ATOMIC_ACQUIRE); + } +#endif +} + +namespace AcquireRelease { + +constexpr auto kTestValue = 42; + +template +__host__ __device__ void Producer(int* const flag, int* const data) { + constexpr int actual_memory_order = + memory_order == __ATOMIC_ACQUIRE ? __ATOMIC_RELEASE : memory_order; + + data[0] = kTestValue; + + SetFlag(flag); +} + +template +__host__ __device__ void Consumer(int* const flag, int* const data, int* const ret) { + while (!FetchFlag(flag)) + ; + + ret[0] = data[0]; +} + +template +__global__ void TestKernel(int* const flag, int* data, int* const ret) { + __shared__ int shared_mem; + + if (data == nullptr) data = &shared_mem; + + if (blockIdx.x == 0 && threadIdx.x == 0) { + if constexpr (operation == BuiltinAtomicOperation::kAnd) + *flag = 1; + else + *flag = 0; + } + __syncthreads(); + + bool producer = false, consumer = false; + + if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + producer = blockIdx.x == 0 && threadIdx.x == 0; + consumer = blockIdx.x == 0 && threadIdx.x == 1; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + producer = blockIdx.x == 0 && threadIdx.x == 0; + consumer = blockIdx.x == 0 && threadIdx.x == warpSize; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + producer = blockIdx.x == 0 && threadIdx.x == 0; + consumer = blockIdx.x == 1 && threadIdx.x == 0; + } + + if (producer) { + Producer(flag, data); + return; + } + + if (consumer) { + Consumer(flag, data, ret); + return; + } +} + +template +__global__ void ProducerKernel(int* const flag, int* const data) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Producer(flag, data); +} + +template +__global__ void ConsumerKernel(int* const flag, int* const data, int* const ret) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Consumer(flag, data, ret); +} + +template void Test() { + int blocks = 1, threads = 1; + if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + blocks = 1; + threads = 2; + } else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + blocks = 1; + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + threads = warp_size * 2; + } else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + blocks = 2; + threads = 1; + } + + LinearAllocGuard flag(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard ret(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipMallocManaged); + LinearAllocGuard data(alloc_type, sizeof(int)); + TestKernel + <<>>(flag.ptr(), data.ptr(), ret.ptr()); + } + + if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) { + SECTION("Shared memory") { + TestKernel + <<>>(flag.ptr(), nullptr, ret.ptr()); + } + } + + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(ret.ptr()[0] == kTestValue); +} + +template void SystemTest() { + std::thread host_thread; + + LinearAllocGuard flag(LinearAllocs::hipMallocManaged, sizeof(int)); + LinearAllocGuard ret(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipHostMalloc, LinearAllocs::hipMallocManaged); + LinearAllocGuard data(alloc_type, sizeof(int)); + + SECTION("Host producer - Device consumer") { + ConsumerKernel + <<<1, 1>>>(flag.ptr(), data.ptr(), ret.ptr()); + host_thread = std::thread([&] { + Producer(flag.ptr(), data.ptr()); + }); + } + + SECTION("Device producer - Host consumer") { + host_thread = std::thread([&] { + Consumer(flag.ptr(), data.ptr(), + ret.ptr()); + }); + ProducerKernel + <<<1, 1>>>(flag.ptr(), data.ptr()); + } + } + + HIP_CHECK(hipDeviceSynchronize()); + host_thread.join(); + + REQUIRE(ret.ptr()[0] == kTestValue); +} + +} /* namespace AcquireRelease */ + +namespace SequentialConsistency { + +template +__host__ __device__ void Producer(int* const flag) { + __atomic_store_n(flag, 1, __ATOMIC_SEQ_CST); +} + +template +__host__ __device__ void Consumer(int* const flag1, int* const flag2, int* const counter) { + while (!FetchFlag(flag1)) + ; + if (FetchFlag(flag2)) { +#ifdef __HIP_DEVICE_COMPILE__ + __hip_atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST, memory_scope); +#else + __atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST); +#endif + } +} + +template +__global__ void TestKernel(int* flag1, int* flag2, int* const counter) { + __shared__ int shared_mem[2]; + + if (flag1 == nullptr) flag1 = &shared_mem[0]; + if (flag2 == nullptr) flag2 = &shared_mem[1]; + + if (blockIdx.x == 0 && threadIdx.x == 0) { + if constexpr (operation == BuiltinAtomicOperation::kAnd) { + *flag1 = 1; + *flag2 = 1; + } else { + *flag1 = 0; + *flag2 = 0; + } + } + __syncthreads(); + + bool producer1 = false, producer2 = false, consumer1 = false, consumer2 = false; + + if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + producer1 = blockIdx.x == 0 && threadIdx.x == 0; + consumer1 = blockIdx.x == 0 && threadIdx.x == 1; + producer2 = blockIdx.x == 0 && threadIdx.x == 2; + consumer2 = blockIdx.x == 0 && threadIdx.x == 3; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + producer1 = blockIdx.x == 0 && threadIdx.x == 0; + consumer1 = blockIdx.x == 0 && threadIdx.x == warpSize; + producer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 2; + consumer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 3; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + producer1 = blockIdx.x == 0 && threadIdx.x == 0; + consumer1 = blockIdx.x == 1 && threadIdx.x == 0; + producer2 = blockIdx.x == 2 && threadIdx.x == 0; + consumer2 = blockIdx.x == 3 && threadIdx.x == 0; + } + + if (producer1) { + Producer(flag1); + return; + } + + if (consumer1) { + Consumer(flag1, flag2, counter); + return; + } + + if (producer2) { + Producer(flag2); + return; + } + + if (consumer2) { + Consumer(flag2, flag1, counter); + return; + } +} + +template +__global__ void ProducerKernel(int* const flag) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Producer(flag); +} + +template +__global__ void ConsumerKernel(int* const flag1, int* const flag2, int* const counter) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Consumer(flag1, flag2, counter); +} + +template void Test() { + int blocks = 1, threads = 1; + if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + blocks = 1; + threads = 4; + } else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + blocks = 1; + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + threads = warp_size * 4; + } else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + blocks = 4; + threads = 1; + } + + LinearAllocGuard counter(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipMalloc); + LinearAllocGuard flag1(alloc_type, sizeof(int)); + LinearAllocGuard flag2(alloc_type, sizeof(int)); + TestKernel + <<>>(flag1.ptr(), flag2.ptr(), counter.ptr()); + } + + if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) { + SECTION("Shared memory") { + TestKernel<<>>(nullptr, nullptr, counter.ptr()); + } + } + + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(counter.ptr()[0] != 0); +} + +template void SystemTest() { + std::thread host_producer, host_consumer; + + LinearAllocGuard counter(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipMallocManaged); + LinearAllocGuard flag1(alloc_type, sizeof(int)); + LinearAllocGuard flag2(alloc_type, sizeof(int)); + + ConsumerKernel + <<<1, 1>>>(flag1.ptr(), flag2.ptr(), counter.ptr()); + host_consumer = std::thread([&] { + Consumer(flag2.ptr(), flag1.ptr(), counter.ptr()); + }); + + ProducerKernel<<<1, 1>>>(flag1.ptr()); + host_producer = + std::thread([&] { Producer(flag2.ptr()); }); + } + + HIP_CHECK(hipDeviceSynchronize()); + host_producer.join(); + host_consumer.join(); + + REQUIRE(counter.ptr()[0] != 0); +} + +} // namespace SequentialConsistency \ No newline at end of file diff --git a/catch/unit/atomics/min_max_common.hh b/catch/unit/atomics/min_max_common.hh new file mode 100644 index 0000000000..13234564d9 --- /dev/null +++ b/catch/unit/atomics/min_max_common.hh @@ -0,0 +1,420 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +namespace MinMax { +enum class AtomicOperation { + kMin = 0, + kMinSystem, + kMax, + kMaxSystem, + kSafeMin, + kUnsafeMin, + kSafeMax, + kUnsafeMax, + kBuiltinMin, + kBuiltinMax +}; + +constexpr auto kIntegerTestValue = 5; +constexpr auto kFloatingPointTestValue = 5.5; + +template +__host__ __device__ TestType GetTestValue() { + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + + if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || + operation == AtomicOperation::kUnsafeMin || + operation == AtomicOperation::kSafeMin) { + return test_value - 2; + } + + return test_value + 2; +} + +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto val = GetTestValue(); + + if constexpr (operation == AtomicOperation::kMin) { + return atomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kMinSystem) { + return atomicMin_system(mem, val); + } else if constexpr (operation == AtomicOperation::kMax) { + return atomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kMaxSystem) { + return atomicMax_system(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeMin) { + return unsafeAtomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeMin) { + return safeAtomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeMax) { + return unsafeAtomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeMax) { + return safeAtomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kBuiltinMin) { + return __hip_atomic_fetch_min(mem, val, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinMax) { + return __hip_atomic_fetch_max(mem, val, __ATOMIC_RELAXED, memory_scope); + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + const auto n = cooperative_groups::this_grid().size() - width; + + TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = PerformAtomicOperation( + PitchedOffset(mem, pitch, tid % width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + GenerateMemoryTraffic(begin_addr, end_addr); + } + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + dim3 blocks; + dim3 threads; + unsigned int num_devices = 1u; + unsigned int kernel_count = 1u; + unsigned int width = 1u; + unsigned int pitch = 0u; + unsigned int host_thread_count = 0u; + LinearAllocs alloc_type; +}; + +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto val = GetTestValue(); + + const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount(); + + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + + std::vector res_vals(p.width, test_value); + std::vector old_vals; + old_vals.reserve(thread_count); + + for (auto tid = 0u; tid < thread_count; ++tid) { + auto& res = res_vals[tid % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || + operation == AtomicOperation::kUnsafeMin || + operation == AtomicOperation::kSafeMin || + operation == AtomicOperation::kBuiltinMin) { + res = std::min(res, val); + } else if constexpr (operation == AtomicOperation::kMax || + operation == AtomicOperation::kMaxSystem || + operation == AtomicOperation::kUnsafeMax || + operation == AtomicOperation::kSafeMax || + operation == AtomicOperation::kBuiltinMax) { + res = std::max(res, val); + } + } + + return {res_vals, old_vals}; +} + +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +template +void TestCore(const TestParams& p) { + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) { + HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice)); + } + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), + old_vals); + } + } + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } + +inline dim3 GenerateBlockDimensions() { + int sm_count = 0; + HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); + return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); +} + +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + if constexpr ((operation == AtomicOperation::kBuiltinMin || + operation == AtomicOperation::kBuiltinMax) && + memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr ((operation == AtomicOperation::kBuiltinMin || + operation == AtomicOperation::kBuiltinMax) && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateThreadDimensions(); + } + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + if constexpr ((operation == AtomicOperation::kBuiltinMin || + operation == AtomicOperation::kBuiltinMax) && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateBlockDimensions(); + } + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GenerateThreadDimensions(); + params.threads = GenerateBlockDimensions(); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +template +void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, + const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GenerateThreadDimensions(); + params.threads = GenerateBlockDimensions(); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +} // namespace MinMax diff --git a/catch/unit/atomics/safeAtomicAdd.cc b/catch/unit/atomics/safeAtomicAdd.cc new file mode 100644 index 0000000000..cfc760a7ce --- /dev/null +++ b/catch/unit/atomics/safeAtomicAdd.cc @@ -0,0 +1,123 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup safeAtomicAdd safeAtomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of safeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/safeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will + * perform an atomic addition on a target memory location. Each thread will add the same value to + * the memory location, storing the return value into a separate output array slot corresponding + * to it. Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of safeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive_Multi_Kernel", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/safeAtomicMax.cc b/catch/unit/atomics/safeAtomicMax.cc new file mode 100644 index 0000000000..581a4a566a --- /dev/null +++ b/catch/unit/atomics/safeAtomicMax.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup safeAtomicMax safeAtomicMax + * @{ + * @ingroup AtomicsTest + * `safeAtomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/safeAtomicMin.cc b/catch/unit/atomics/safeAtomicMin.cc new file mode 100644 index 0000000000..810be72ca4 --- /dev/null +++ b/catch/unit/atomics/safeAtomicMin.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup safeAtomicMin safeAtomicMin + * @{ + * @ingroup AtomicsTest + * `safeAtomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/sequential_consistency.cc b/catch/unit/atomics/sequential_consistency.cc new file mode 100644 index 0000000000..c37b26487a --- /dev/null +++ b/catch/unit/atomics/sequential_consistency.cc @@ -0,0 +1,165 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "memory_order_common.hh" + +TEST_CASE("Unit___hip_atomic_load_store_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_exchange_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { + SequentialConsistency::SystemTest(); + } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { + SequentialConsistency::SystemTest(); + } +} + +TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} \ No newline at end of file diff --git a/catch/unit/atomics/unsafeAtomicAdd.cc b/catch/unit/atomics/unsafeAtomicAdd.cc new file mode 100644 index 0000000000..8c717c7bf5 --- /dev/null +++ b/catch/unit/atomics/unsafeAtomicAdd.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicAdd unsafeAtomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of unsafeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will + * perform an atomic addition on a target memory location. Each thread will add the same value to + * the memory location, storing the return value into a separate output array slot corresponding + * to it. Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of unsafeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive_Multi_Kernel", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/unsafeAtomicMax.cc b/catch/unit/atomics/unsafeAtomicMax.cc new file mode 100644 index 0000000000..2341e2d8c7 --- /dev/null +++ b/catch/unit/atomics/unsafeAtomicMax.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicMax unsafeAtomicMax + * @{ + * @ingroup AtomicsTest + * `unsafeAtomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/unsafeAtomicMin.cc b/catch/unit/atomics/unsafeAtomicMin.cc new file mode 100644 index 0000000000..60b827854a --- /dev/null +++ b/catch/unit/atomics/unsafeAtomicMin.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicMin unsafeAtomicMin + * @{ + * @ingroup AtomicsTest + * `unsafeAtomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/compileAndCaptureOutput.py b/catch/unit/compileAndCaptureOutput.py index a8a7fb506a..b5bb925cc4 100644 --- a/catch/unit/compileAndCaptureOutput.py +++ b/catch/unit/compileAndCaptureOutput.py @@ -52,7 +52,7 @@ class CompileAndCapture(unittest.TestCase): # HIP compiler on AMD platforms has limit of 20 errors, and some negative # test cases expect that more errors are detected. if (self.platform == 'amd'): - compiler_args.append('-ferror-limit=100') + compiler_args.append('-ferror-limit=200') compiler_output = subprocess.run(compiler_args, stderr=subprocess.PIPE) # Get the compiler output in the stdout if -V flag is raised during ctest invocation. compiler_stderr = compiler_output.stderr.decode('UTF-8') diff --git a/catch/unit/cooperativeGrps/CMakeLists.txt b/catch/unit/cooperativeGrps/CMakeLists.txt index 1a5f234e3b..9732e58f23 100644 --- a/catch/unit/cooperativeGrps/CMakeLists.txt +++ b/catch/unit/cooperativeGrps/CMakeLists.txt @@ -2,6 +2,7 @@ set(TEST_SRC thread_block.cc thread_block_tile.cc + coalesced_group_tiled_partition.cc hipCGThreadBlockType_old.cc hipCGMultiGridGroupType_old.cc hipCGGridGroupType_old.cc diff --git a/catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc b/catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc new file mode 100644 index 0000000000..f14a60caea --- /dev/null +++ b/catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc @@ -0,0 +1,685 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "cooperative_groups_common.hh" + +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * @addtogroup coalesced_group_tile coalesced_group_tile + * @{ + * @ingroup DeviceLanguageTest + * Contains unit tests for partitioning of coalesced groups into tiled partitions + */ + +namespace cg = cooperative_groups; + +namespace { +#if HT_AMD +constexpr auto kMaskMin = std::numeric_limits().min(); +constexpr auto kMaskLimit = std::numeric_limits().max(); +#else +constexpr auto kMaskMin = std::numeric_limits().min(); +constexpr auto kMaskLimit = std::numeric_limits().max(); +#endif +} // namespace + +static unsigned int GenerateTileSizes() { +#if HT_AMD + return GENERATE(2u, 4u, 8u, 16u, 32u, 64u); +#else + return GENERATE(2u, 4u, 8u, 16u, 32u); +#endif +} + +static inline std::mt19937& GetRandomGenerator() { + static std::mt19937 mt(11); + return mt; +} + +template static inline T GenerateRandomInteger(const T min, const T max) { + std::uniform_int_distribution dist(min, max); + return dist(GetRandomGenerator()); +} + +template static auto coalesce_threads(const uint64_t mask) { + std::tuple, unsigned int> res; + auto& [threads, count] = res; + + count = 0u; + for (auto i = 0u; i < warp_size; ++i) { + if (mask & (1u << i)) { + threads[count++] = i; + } + } + + return res; +} + +template __device__ bool deactivate_thread(uint64_t* active_masks) { + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + const auto block = cg::this_thread_block(); + const auto warps_per_block = (block.size() + warp_size - 1) / warp_size; + const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + const auto idx = block_rank * warps_per_block + block.thread_rank() / warp.size(); + + return !(active_masks[idx] & (1u << warp.thread_rank())); +} + + +template +__global__ void coalesced_group_tiled_partition_size_getter(uint64_t* active_masks, + unsigned int tile_size, + unsigned int* sizes) { + if (deactivate_thread(active_masks)) { + return; + } + sizes[thread_rank_in_grid()] = cg::tiled_partition(cg::coalesced_threads(), tile_size).size(); +} + +template +__global__ void coalesced_group_tiled_partition_thread_rank_getter(uint64_t* active_masks, + unsigned int tile_size, + unsigned int* sizes) { + if (deactivate_thread(active_masks)) { + return; + } + + sizes[thread_rank_in_grid()] = + cg::tiled_partition(cg::coalesced_threads(), tile_size).thread_rank(); +} + +/** + * Test Description + * ------------------------ + * - Deactivates threads based on passed in mask and creates tiled partitions over coalesced + * threads for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes the return values of + * size and thread_rank member functions to an output array that is validated on the host side. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic") { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensions(); + auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(unsigned int); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(0u, std::numeric_limits().max()); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_size_getter<32> + <<>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_thread_rank_getter<32> + <<>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + // validate size + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + const auto num_tiles = (active_thread_count + tile_size - 1) / tile_size; + const auto tile_tail = num_tiles * tile_size - active_thread_count; + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = tile_size - tile_tail * (t + tile_size >= active_thread_count); + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } + + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // validate rank + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = k % tile_size; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + + +template +__global__ void coalesced_group_tiled_partition_shfl_up(uint64_t* active_masks, T* const out, + const unsigned int tile_size, + const unsigned int delta) { + if (deactivate_thread(active_masks)) { + return; + } + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(warp.thread_rank()); + + const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size); + out[thread_rank_in_grid()] = tile.shfl_up(var, delta); +} + + +template static void CoalescedGroupTiledPartitonShflUpTestImpl() { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + const auto delta = GENERATE_COPY(range(0u, tile_size)); + INFO("Delta: " << delta); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_shfl_up + <<>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t + delta; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = active_threads[k - delta]; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle up behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced + * group, with memberships of threads in the coalesced group being controlled via a passed in active + * mask. The test is run for all overloads of shfl_up. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic", "", int, + unsigned int, long, unsigned long, long long, unsigned long long, float, + double) { + CoalescedGroupTiledPartitonShflUpTestImpl(); +} + + +template +__global__ void coalesced_group_tiled_partition_shfl_down(uint64_t* active_masks, T* const out, + const unsigned int tile_size, + const unsigned int delta) { + if (deactivate_thread(active_masks)) { + return; + } + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(warp.thread_rank()); + + const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size); + out[thread_rank_in_grid()] = tile.shfl_down(var, delta); +} + + +template static void CoalescedGroupTiledPartitonShflDownTestImpl() { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + const auto delta = GENERATE_COPY(range(0u, tile_size)); + INFO("Delta: " << delta); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_shfl_down + <<>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + if (delta >= active_thread_count) { + continue; + } + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size - delta; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count - delta; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = active_threads[k + delta]; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle down behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced + * group, with memberships of threads in the coalesced group being controlled via a passed in active + * mask. The test is run for all overloads of shfl_down. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic", "", int, + unsigned int, long, unsigned long, long long, unsigned long long, float, + double) { + CoalescedGroupTiledPartitonShflDownTestImpl(); +} + + +template +__global__ void coalesced_group_tiled_partition_shfl(uint64_t* active_masks, uint8_t* target_lanes, + T* const out, const unsigned int tile_size) { + if (deactivate_thread(active_masks)) { + return; + } + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(warp.thread_rank()); + + const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size); + out[thread_rank_in_grid()] = tile.shfl(var, target_lanes[tile.thread_rank()]); +} + +template static void CoalescedGroupTiledPartitonShflTestImpl() { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard target_lanes_dev(LinearAllocs::hipMalloc, tile_size * sizeof(uint8_t)); + LinearAllocGuard target_lanes(LinearAllocs::hipHostMalloc, tile_size * sizeof(uint8_t)); + + std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size, + [tile_size] { return GenerateRandomInteger(0, static_cast(2 * tile_size)); }); + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_shfl<<>>( + active_masks_dev.ptr(), target_lanes_dev.ptr(), uint_arr_dev.ptr(), tile_size); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto target_lane = target_lanes.ptr()[k % tile_size]; + if (target_lane >= tile_size || target_lane >= active_thread_count - t) { + continue; + } + const auto expected_val = active_threads[t + target_lane]; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced + * group, with memberships of threads in the coalesced group being controlled via a passed in active + * mask. The test is run for all overloads of shfl. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic", "", int, + unsigned int, long, unsigned long, long long, unsigned long long, float, + double) { + CoalescedGroupTiledPartitonShflTestImpl(); +} + + +template +__global__ void coalesced_group_tiled_partition_sync_check(uint64_t* active_masks, T* global_data, + unsigned int* wait_modifiers, + size_t tile_size) { + if (deactivate_thread(active_masks)) { + return; + } + + extern __shared__ uint8_t shared_data[]; + T* const data = use_global ? global_data : reinterpret_cast(shared_data); + const auto tid = cg::this_grid().thread_rank(); + const auto block = cg::this_thread_block(); + const auto coalesced = cg::coalesced_threads(); + const auto partition = cg::tiled_partition(coalesced, tile_size); + const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); }; + + const auto wait_modifier = wait_modifiers[tid]; + + const auto block_rank = tid / block.size(); + const auto warp_rank = block.thread_rank() / warp_size; + const auto warp_base = block_rank * block.size() + warp_rank * warp_size; + const auto global_idx = warp_base + coalesced.thread_rank(); + + busy_wait(wait_modifier); + data[data_idx(global_idx)] = partition.thread_rank(); + partition.sync(); + + bool valid = true; + const auto tile_rank = coalesced.thread_rank() / tile_size; + for (auto i = 0u; i < tile_size; ++i) { + const auto target_rank_in_tile = (coalesced.thread_rank() + i) % tile_size; + const auto target_rank_in_warp = tile_rank * tile_size + target_rank_in_tile; + if (target_rank_in_warp >= coalesced.size()) { + continue; + } + if (!(valid &= (data[data_idx(warp_base + target_rank_in_warp)] == target_rank_in_tile))) { + break; + } + } + // Validate + partition.sync(); + data[data_idx(global_idx)] = valid; + if constexpr (!use_global) { + global_data[global_idx] = data[data_idx(global_idx)]; + } +} + +template void CoalescedGroupTiledPartitionSyncTest() { + const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations)); + INFO("Run number: " << randomized_run_count + 1); + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + const auto alloc_size_per_block = alloc_size / grid.block_count_; + int max_shared_mem_per_block = 0; + HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block, + hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) { + return; + } + + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + LinearAllocGuard wait_modifiers_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard wait_modifiers(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + if (randomized_run_count != 0) { + std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_, + [] { return GenerateRandomInteger(0u, 1500u); }); + } else { + std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u); + } + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(), + grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice)); + + const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block; + coalesced_group_tiled_partition_sync_check + <<>>(active_masks_dev.ptr(), arr_dev.ptr(), + wait_modifiers_dev.ptr(), tile_size); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + for (int i = 0u; i < grid.block_count_; ++i) { + for (int j = 0u; j < warps_in_block; ++j) { + const auto warp_idx = i * warps_in_block + j; + auto mask = active_masks.ptr()[warp_idx]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((warp_idx + 1) % warps_in_block); + mask = (mask << shift_amount) >> shift_amount; + const auto active_count = std::bitset(mask).count(); + const auto start_offset = i * grid.threads_in_block_count_ + j * kWarpSize; + const auto end_offset = start_offset + active_count; + const auto valid = + std::all_of(arr.ptr() + start_offset, arr.ptr() + end_offset, [](T e) { return e; }); + if (!valid) { + REQUIRE(valid); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Launches a kernel wherein threads in each warp are deactivated based on a passed bitmask. + * Coalesced groups are formed and divided into tiled partitions(size of 2, 4, 8, 16, 32, 64 if AMD) + * and every thread writes its intra-tile rank into an array slot determined by its global warp rank + * and coalesced group rank. The array is either in global or dynamic shared memory based on a + * compile time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the + * write each thread executes a busy wait loop for a random amount of clock cycles, the amount being + * read from an input array. After the write a tile-wide sync is performed and each thread validates + * that it can read the expected values that other threads within the same tile have written to + * their respective array slots. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +uint64_t counter = 0; +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic", "", uint8_t, + uint16_t, uint32_t) { + SECTION("Global memory") { CoalescedGroupTiledPartitionSyncTest(); } + SECTION("Shared memory") { CoalescedGroupTiledPartitionSyncTest(); } +} diff --git a/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc b/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc index a7f9ddc7e7..b3dbe4d0e2 100644 --- a/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc +++ b/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include #include -#include + /** * @addtogroup coalesced_group thread_block_tile diff --git a/catch/unit/cooperativeGrps/cooperative_groups_common.hh b/catch/unit/cooperativeGrps/cooperative_groups_common.hh index 20d0d4aa44..19ad0dd092 100644 --- a/catch/unit/cooperativeGrps/cooperative_groups_common.hh +++ b/catch/unit/cooperativeGrps/cooperative_groups_common.hh @@ -76,3 +76,4 @@ template bool CheckDimensions(unsigned int device, T kernel, dim3 bloc return true; } + diff --git a/catch/unit/dynamicLoading/complex_loading_behavior.cc b/catch/unit/dynamicLoading/complex_loading_behavior.cc index c1c412052f..663ce7f5a1 100644 --- a/catch/unit/dynamicLoading/complex_loading_behavior.cc +++ b/catch/unit/dynamicLoading/complex_loading_behavior.cc @@ -18,7 +18,7 @@ THE SOFTWARE. */ #include #include -#include + /** * @addtogroup hipLaunchKernelGGL hipLaunchCooperativeKernel * @{ diff --git a/catch/unit/dynamicLoading/hipApiDynamicLoad.cc b/catch/unit/dynamicLoading/hipApiDynamicLoad.cc index e583f4a3d1..b09300bfea 100644 --- a/catch/unit/dynamicLoading/hipApiDynamicLoad.cc +++ b/catch/unit/dynamicLoading/hipApiDynamicLoad.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #include #include #include diff --git a/catch/unit/errorHandling/CMakeLists.txt b/catch/unit/errorHandling/CMakeLists.txt index 7dcdb52f4c..b9a6de0afa 100644 --- a/catch/unit/errorHandling/CMakeLists.txt +++ b/catch/unit/errorHandling/CMakeLists.txt @@ -1,14 +1,15 @@ # Common Tests - Test independent of all platforms set(TEST_SRC + error_handling_common.cc hipGetErrorName.cc hipGetErrorString.cc - hipGetLastError.cc - hipPeekAtLastError.cc hipDrvGetErrorName.cc hipDrvGetErrorString.cc + hipGetLastError.cc + hipPeekAtLastError.cc ) hip_add_exe_to_target(NAME ErrorHandlingTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++17) \ No newline at end of file + COMPILE_OPTIONS -std=c++17) diff --git a/catch/unit/errorHandling/error_handling_common.cc b/catch/unit/errorHandling/error_handling_common.cc new file mode 100644 index 0000000000..20267e793d --- /dev/null +++ b/catch/unit/errorHandling/error_handling_common.cc @@ -0,0 +1,534 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "error_handling_common.hh" + +const char* ErrorName(hipError_t enumerator) { + switch (enumerator) { +#if HT_AMD + case hipSuccess: + return "hipSuccess"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidSymbol: + return "hipErrorInvalidSymbol"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorInsufficientDriver: + return "hipErrorInsufficientDriver"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidPitchValue: + return "hipErrorInvalidPitchValue"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorIllegalState: + return "hipErrorIllegalState"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorContextIsDestroyed: + return "hipErrorContextIsDestroyed"; + case hipErrorAssert: + return "hipErrorAssert"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorNotSupported: + return "hipErrorNotSupported"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorCooperativeLaunchTooLarge: + return "hipErrorCooperativeLaunchTooLarge"; + case hipErrorStreamCaptureUnsupported: + return "hipErrorStreamCaptureUnsupported"; + case hipErrorStreamCaptureInvalidated: + return "hipErrorStreamCaptureInvalidated"; + case hipErrorStreamCaptureMerge: + return "hipErrorStreamCaptureMerge"; + case hipErrorStreamCaptureUnmatched: + return "hipErrorStreamCaptureUnmatched"; + case hipErrorStreamCaptureUnjoined: + return "hipErrorStreamCaptureUnjoined"; + case hipErrorStreamCaptureIsolation: + return "hipErrorStreamCaptureIsolation"; + case hipErrorStreamCaptureImplicit: + return "hipErrorStreamCaptureImplicit"; + case hipErrorCapturedEvent: + return "hipErrorCapturedEvent"; + case hipErrorStreamCaptureWrongThread: + return "hipErrorStreamCaptureWrongThread"; + case hipErrorGraphExecUpdateFailure: + return "hipErrorGraphExecUpdateFailure"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; +#else + case hipSuccess: + return "CUDA_SUCCESS"; + case hipErrorInvalidValue: + return "CUDA_ERROR_INVALID_VALUE"; + case hipErrorOutOfMemory: + return "CUDA_ERROR_OUT_OF_MEMORY"; + case hipErrorNotInitialized: + return "CUDA_ERROR_NOT_INITIALIZED"; + case hipErrorDeinitialized: + return "CUDA_ERROR_DEINITIALIZED"; + case hipErrorProfilerDisabled: + return "CUDA_ERROR_PROFILER_DISABLED"; + case hipErrorProfilerNotInitialized: + return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; + case hipErrorProfilerAlreadyStarted: + return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; + case hipErrorProfilerAlreadyStopped: + return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; + case hipErrorInvalidConfiguration: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidSymbol: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidDevicePointer: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidMemcpyDirection: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInsufficientDriver: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorMissingConfiguration: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorPriorLaunchFailure: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidDeviceFunction: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorNoDevice: + return "CUDA_ERROR_NO_DEVICE"; + case hipErrorInvalidDevice: + return "CUDA_ERROR_INVALID_DEVICE"; + case hipErrorInvalidPitchValue: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidImage: + return "CUDA_ERROR_INVALID_IMAGE"; + case hipErrorInvalidContext: + return "CUDA_ERROR_INVALID_CONTEXT"; + case hipErrorContextAlreadyCurrent: + return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; + case hipErrorMapFailed: + return "CUDA_ERROR_MAP_FAILED"; + case hipErrorUnmapFailed: + return "CUDA_ERROR_UNMAP_FAILED"; + case hipErrorArrayIsMapped: + return "CUDA_ERROR_ARRAY_IS_MAPPED"; + case hipErrorAlreadyMapped: + return "CUDA_ERROR_ALREADY_MAPPED"; + case hipErrorNoBinaryForGpu: + return "CUDA_ERROR_NO_BINARY_FOR_GPU"; + case hipErrorAlreadyAcquired: + return "CUDA_ERROR_ALREADY_ACQUIRED"; + case hipErrorNotMapped: + return "CUDA_ERROR_NOT_MAPPED"; + case hipErrorNotMappedAsArray: + return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; + case hipErrorNotMappedAsPointer: + return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; + case hipErrorECCNotCorrectable: + return "CUDA_ERROR_ECC_UNCORRECTABLE"; + case hipErrorUnsupportedLimit: + return "CUDA_ERROR_UNSUPPORTED_LIMIT"; + case hipErrorContextAlreadyInUse: + return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; + case hipErrorPeerAccessUnsupported: + return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; + case hipErrorInvalidKernelFile: + return "CUDA_ERROR_INVALID_PTX"; + case hipErrorInvalidGraphicsContext: + return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; + case hipErrorInvalidSource: + return "CUDA_ERROR_INVALID_SOURCE"; + case hipErrorFileNotFound: + return "CUDA_ERROR_FILE_NOT_FOUND"; + case hipErrorSharedObjectSymbolNotFound: + return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; + case hipErrorSharedObjectInitFailed: + return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; + case hipErrorOperatingSystem: + return "CUDA_ERROR_OPERATING_SYSTEM"; + case hipErrorInvalidHandle: + return "CUDA_ERROR_INVALID_HANDLE"; + case hipErrorIllegalState: + return "CUDA_ERROR_ILLEGAL_STATE"; + case hipErrorNotFound: + return "CUDA_ERROR_NOT_FOUND"; + case hipErrorNotReady: + return "CUDA_ERROR_NOT_READY"; + case hipErrorIllegalAddress: + return "CUDA_ERROR_ILLEGAL_ADDRESS"; + case hipErrorLaunchOutOfResources: + return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; + case hipErrorLaunchTimeOut: + return "CUDA_ERROR_LAUNCH_TIMEOUT"; + case hipErrorPeerAccessAlreadyEnabled: + return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; + case hipErrorPeerAccessNotEnabled: + return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; + case hipErrorSetOnActiveProcess: + return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; + case hipErrorContextIsDestroyed: + return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; + case hipErrorAssert: + return "CUDA_ERROR_ASSERT"; + case hipErrorHostMemoryAlreadyRegistered: + return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; + case hipErrorHostMemoryNotRegistered: + return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; + case hipErrorLaunchFailure: + return "CUDA_ERROR_LAUNCH_FAILED"; + case hipErrorNotSupported: + return "CUDA_ERROR_NOT_SUPPORTED"; + case hipErrorUnknown: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorRuntimeMemory: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorRuntimeOther: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorCooperativeLaunchTooLarge: + return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE"; + case hipErrorStreamCaptureUnsupported: + return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED"; + case hipErrorStreamCaptureInvalidated: + return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED"; + case hipErrorStreamCaptureMerge: + return "CUDA_ERROR_STREAM_CAPTURE_MERGE"; + case hipErrorStreamCaptureUnmatched: + return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED"; + case hipErrorStreamCaptureUnjoined: + return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED"; + case hipErrorStreamCaptureIsolation: + return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION"; + case hipErrorStreamCaptureImplicit: + return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT"; + case hipErrorCapturedEvent: + return "CUDA_ERROR_CAPTURED_EVENT"; + case hipErrorStreamCaptureWrongThread: + return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD"; + case hipErrorGraphExecUpdateFailure: + return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE"; + default: + return "CUDA_ERROR_UNKNOWN"; +#endif + } +} + +const char* ErrorString(hipError_t enumerator) { + switch (enumerator) { + case hipSuccess: + return "no error"; + case hipErrorInvalidValue: + return "invalid argument"; + case hipErrorOutOfMemory: + return "out of memory"; + case hipErrorNotInitialized: + return "initialization error"; + case hipErrorDeinitialized: + return "driver shutting down"; + case hipErrorProfilerDisabled: + return "profiler disabled while using external profiling tool"; + case hipErrorProfilerNotInitialized: +#if HT_AMD + return "profiler is not initialized"; +#elif HT_NVIDIA + return "profiler not initialized: call cudaProfilerInitialize()"; +#endif + case hipErrorProfilerAlreadyStarted: + return "profiler already started"; + case hipErrorProfilerAlreadyStopped: + return "profiler already stopped"; +#if HT_AMD + case hipErrorInvalidConfiguration: + return "invalid configuration argument"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidPitchValue: + return "invalid pitch argument"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidSymbol: + return "invalid device symbol"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidDevicePointer: + return "invalid device pointer"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidMemcpyDirection: + return "invalid copy direction for memcpy"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInsufficientDriver: + return "driver version is insufficient for runtime version"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorMissingConfiguration: + return "__global__ function call is not configured"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorPriorLaunchFailure: + return "unspecified launch failure in prior launch"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidDeviceFunction: + return "invalid device function"; +#elif HT_NVIDIA + return "unknown error"; +#endif + case hipErrorNoDevice: +#if HT_AMD + return "no ROCm-capable device is detected"; +#elif HT_NVIDIA + return "no CUDA-capable device is detected"; +#endif + case hipErrorInvalidDevice: + return "invalid device ordinal"; + case hipErrorInvalidImage: + return "device kernel image is invalid"; + case hipErrorInvalidContext: + return "invalid device context"; + case hipErrorContextAlreadyCurrent: +#if HT_AMD + return "context is already current context"; +#elif HT_NVIDIA + return "context already current"; +#endif + case hipErrorMapFailed: + return "mapping of buffer object failed"; + case hipErrorUnmapFailed: + return "unmapping of buffer object failed"; + case hipErrorArrayIsMapped: + return "array is mapped"; + case hipErrorAlreadyMapped: + return "resource already mapped"; + case hipErrorNoBinaryForGpu: + return "no kernel image is available for execution on the device"; + case hipErrorAlreadyAcquired: + return "resource already acquired"; + case hipErrorNotMapped: + return "resource not mapped"; + case hipErrorNotMappedAsArray: + return "resource not mapped as array"; + case hipErrorNotMappedAsPointer: + return "resource not mapped as pointer"; + case hipErrorECCNotCorrectable: + return "uncorrectable ECC error encountered"; + case hipErrorUnsupportedLimit: + return "limit is not supported on this architecture"; + case hipErrorContextAlreadyInUse: + return "exclusive-thread device already in use by a different thread"; + case hipErrorPeerAccessUnsupported: + return "peer access is not supported between these two devices"; + case hipErrorInvalidKernelFile: +#if HT_AMD + return "invalid kernel file"; +#elif HT_NVIDIA + return "a PTX JIT compilation failed"; +#endif + case hipErrorInvalidGraphicsContext: + return "invalid OpenGL or DirectX context"; + case hipErrorInvalidSource: + return "device kernel image is invalid"; + case hipErrorFileNotFound: + return "file not found"; + case hipErrorSharedObjectSymbolNotFound: + return "shared object symbol not found"; + case hipErrorSharedObjectInitFailed: + return "shared object initialization failed"; + case hipErrorOperatingSystem: + return "OS call failed or operation not supported on this OS"; + case hipErrorInvalidHandle: + return "invalid resource handle"; + case hipErrorIllegalState: + return "the operation cannot be performed in the present state"; + case hipErrorNotFound: + return "named symbol not found"; + case hipErrorNotReady: + return "device not ready"; + case hipErrorIllegalAddress: + return "an illegal memory access was encountered"; + case hipErrorLaunchOutOfResources: + return "too many resources requested for launch"; + case hipErrorLaunchTimeOut: + return "the launch timed out and was terminated"; + case hipErrorPeerAccessAlreadyEnabled: + return "peer access is already enabled"; + case hipErrorPeerAccessNotEnabled: + return "peer access has not been enabled"; + case hipErrorSetOnActiveProcess: + return "cannot set while device is active in this process"; + case hipErrorContextIsDestroyed: + return "context is destroyed"; + case hipErrorAssert: + return "device-side assert triggered"; + case hipErrorHostMemoryAlreadyRegistered: + return "part or all of the requested memory range is already mapped"; + case hipErrorHostMemoryNotRegistered: + return "pointer does not correspond to a registered memory region"; + case hipErrorLaunchFailure: + return "unspecified launch failure"; + case hipErrorCooperativeLaunchTooLarge: + return "too many blocks in cooperative launch"; + case hipErrorNotSupported: + return "operation not supported"; + case hipErrorStreamCaptureUnsupported: + return "operation not permitted when stream is capturing"; + case hipErrorStreamCaptureInvalidated: + return "operation failed due to a previous error during capture"; + case hipErrorStreamCaptureMerge: + return "operation would result in a merge of separate capture sequences"; + case hipErrorStreamCaptureUnmatched: + return "capture was not ended in the same stream as it began"; + case hipErrorStreamCaptureUnjoined: + return "capturing stream has unjoined work"; + case hipErrorStreamCaptureIsolation: + return "dependency created on uncaptured work in another stream"; + case hipErrorStreamCaptureImplicit: + return "operation would make the legacy stream depend on a capturing blocking stream"; // NOLINT + case hipErrorCapturedEvent: + return "operation not permitted on an event last recorded in a capturing stream"; // NOLINT + case hipErrorStreamCaptureWrongThread: + return "attempt to terminate a thread-local capture sequence from another thread"; // NOLINT + case hipErrorGraphExecUpdateFailure: + return "the graph update was not performed because it included changes which violated " + "constraints specific to instantiated graph update"; // NOLINT + case hipErrorRuntimeMemory: + return "runtime memory call returned error"; + case hipErrorRuntimeOther: + return "runtime call other than memory returned error"; + case hipErrorUnknown: + default: + return "unknown error"; + } +} \ No newline at end of file diff --git a/catch/unit/errorHandling/errorEnumerators.h b/catch/unit/errorHandling/error_handling_common.hh similarity index 95% rename from catch/unit/errorHandling/errorEnumerators.h rename to catch/unit/errorHandling/error_handling_common.hh index e671938c41..902735a1ed 100644 --- a/catch/unit/errorHandling/errorEnumerators.h +++ b/catch/unit/errorHandling/error_handling_common.hh @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -33,7 +33,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess, hipErrorProfilerNotInitialized, hipErrorProfilerAlreadyStarted, hipErrorProfilerAlreadyStopped, - #if HT_AMD +#if HT_AMD hipErrorInvalidConfiguration, hipErrorInvalidPitchValue, hipErrorInvalidSymbol, @@ -43,7 +43,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess, hipErrorMissingConfiguration, hipErrorPriorLaunchFailure, hipErrorInvalidDeviceFunction, - #endif +#endif hipErrorNoDevice, hipErrorInvalidDevice, hipErrorInvalidImage, @@ -97,8 +97,12 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess, hipErrorStreamCaptureWrongThread, hipErrorGraphExecUpdateFailure, hipErrorUnknown, - #if HT_AMD +#if HT_AMD hipErrorRuntimeMemory, hipErrorRuntimeOther - #endif - }; +#endif +}; + +const char* ErrorName(hipError_t enumerator); + +const char* ErrorString(hipError_t enumerator); \ No newline at end of file diff --git a/catch/unit/errorHandling/hipDrvGetErrorName.cc b/catch/unit/errorHandling/hipDrvGetErrorName.cc index 367d890be0..b3401cfc5d 100644 --- a/catch/unit/errorHandling/hipDrvGetErrorName.cc +++ b/catch/unit/errorHandling/hipDrvGetErrorName.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,347 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include #include -#include "errorEnumerators.h" -// Local Function to return the error code in string +#include "error_handling_common.hh" -static const char *ErrorName(hipError_t enumerator) { - switch (enumerator) { - #if HT_AMD - case hipSuccess: - return "hipSuccess"; - case hipErrorInvalidValue: - return "hipErrorInvalidValue"; - case hipErrorOutOfMemory: - return "hipErrorOutOfMemory"; - case hipErrorNotInitialized: - return "hipErrorNotInitialized"; - case hipErrorDeinitialized: - return "hipErrorDeinitialized"; - case hipErrorProfilerDisabled: - return "hipErrorProfilerDisabled"; - case hipErrorProfilerNotInitialized: - return "hipErrorProfilerNotInitialized"; - case hipErrorProfilerAlreadyStarted: - return "hipErrorProfilerAlreadyStarted"; - case hipErrorProfilerAlreadyStopped: - return "hipErrorProfilerAlreadyStopped"; - case hipErrorInvalidConfiguration: - return "hipErrorInvalidConfiguration"; - case hipErrorInvalidSymbol: - return "hipErrorInvalidSymbol"; - case hipErrorInvalidDevicePointer: - return "hipErrorInvalidDevicePointer"; - case hipErrorInvalidMemcpyDirection: - return "hipErrorInvalidMemcpyDirection"; - case hipErrorInsufficientDriver: - return "hipErrorInsufficientDriver"; - case hipErrorMissingConfiguration: - return "hipErrorMissingConfiguration"; - case hipErrorPriorLaunchFailure: - return "hipErrorPriorLaunchFailure"; - case hipErrorInvalidDeviceFunction: - return "hipErrorInvalidDeviceFunction"; - case hipErrorNoDevice: - return "hipErrorNoDevice"; - case hipErrorInvalidDevice: - return "hipErrorInvalidDevice"; - case hipErrorInvalidPitchValue: - return "hipErrorInvalidPitchValue"; - case hipErrorInvalidImage: - return "hipErrorInvalidImage"; - case hipErrorInvalidContext: - return "hipErrorInvalidContext"; - case hipErrorContextAlreadyCurrent: - return "hipErrorContextAlreadyCurrent"; - case hipErrorMapFailed: - return "hipErrorMapFailed"; - case hipErrorUnmapFailed: - return "hipErrorUnmapFailed"; - case hipErrorArrayIsMapped: - return "hipErrorArrayIsMapped"; - case hipErrorAlreadyMapped: - return "hipErrorAlreadyMapped"; - case hipErrorNoBinaryForGpu: - return "hipErrorNoBinaryForGpu"; - case hipErrorAlreadyAcquired: - return "hipErrorAlreadyAcquired"; - case hipErrorNotMapped: - return "hipErrorNotMapped"; - case hipErrorNotMappedAsArray: - return "hipErrorNotMappedAsArray"; - case hipErrorNotMappedAsPointer: - return "hipErrorNotMappedAsPointer"; - case hipErrorECCNotCorrectable: - return "hipErrorECCNotCorrectable"; - case hipErrorUnsupportedLimit: - return "hipErrorUnsupportedLimit"; - case hipErrorContextAlreadyInUse: - return "hipErrorContextAlreadyInUse"; - case hipErrorPeerAccessUnsupported: - return "hipErrorPeerAccessUnsupported"; - case hipErrorInvalidKernelFile: - return "hipErrorInvalidKernelFile"; - case hipErrorInvalidGraphicsContext: - return "hipErrorInvalidGraphicsContext"; - case hipErrorInvalidSource: - return "hipErrorInvalidSource"; - case hipErrorFileNotFound: - return "hipErrorFileNotFound"; - case hipErrorSharedObjectSymbolNotFound: - return "hipErrorSharedObjectSymbolNotFound"; - case hipErrorSharedObjectInitFailed: - return "hipErrorSharedObjectInitFailed"; - case hipErrorOperatingSystem: - return "hipErrorOperatingSystem"; - case hipErrorInvalidHandle: - return "hipErrorInvalidHandle"; - case hipErrorIllegalState: - return "hipErrorIllegalState"; - case hipErrorNotFound: - return "hipErrorNotFound"; - case hipErrorNotReady: - return "hipErrorNotReady"; - case hipErrorIllegalAddress: - return "hipErrorIllegalAddress"; - case hipErrorLaunchOutOfResources: - return "hipErrorLaunchOutOfResources"; - case hipErrorLaunchTimeOut: - return "hipErrorLaunchTimeOut"; - case hipErrorPeerAccessAlreadyEnabled: - return "hipErrorPeerAccessAlreadyEnabled"; - case hipErrorPeerAccessNotEnabled: - return "hipErrorPeerAccessNotEnabled"; - case hipErrorSetOnActiveProcess: - return "hipErrorSetOnActiveProcess"; - case hipErrorContextIsDestroyed: - return "hipErrorContextIsDestroyed"; - case hipErrorAssert: - return "hipErrorAssert"; - case hipErrorHostMemoryAlreadyRegistered: - return "hipErrorHostMemoryAlreadyRegistered"; - case hipErrorHostMemoryNotRegistered: - return "hipErrorHostMemoryNotRegistered"; - case hipErrorLaunchFailure: - return "hipErrorLaunchFailure"; - case hipErrorNotSupported: - return "hipErrorNotSupported"; - case hipErrorUnknown: - return "hipErrorUnknown"; - case hipErrorRuntimeMemory: - return "hipErrorRuntimeMemory"; - case hipErrorRuntimeOther: - return "hipErrorRuntimeOther"; - case hipErrorCooperativeLaunchTooLarge: - return "hipErrorCooperativeLaunchTooLarge"; - case hipErrorStreamCaptureUnsupported: - return "hipErrorStreamCaptureUnsupported"; - case hipErrorStreamCaptureInvalidated: - return "hipErrorStreamCaptureInvalidated"; - case hipErrorStreamCaptureMerge: - return "hipErrorStreamCaptureMerge"; - case hipErrorStreamCaptureUnmatched: - return "hipErrorStreamCaptureUnmatched"; - case hipErrorStreamCaptureUnjoined: - return "hipErrorStreamCaptureUnjoined"; - case hipErrorStreamCaptureIsolation: - return "hipErrorStreamCaptureIsolation"; - case hipErrorStreamCaptureImplicit: - return "hipErrorStreamCaptureImplicit"; - case hipErrorCapturedEvent: - return "hipErrorCapturedEvent"; - case hipErrorStreamCaptureWrongThread: - return "hipErrorStreamCaptureWrongThread"; - case hipErrorGraphExecUpdateFailure: - return "hipErrorGraphExecUpdateFailure"; - case hipErrorTbd: - return "hipErrorTbd"; - default: - return "hipErrorUnknown"; - #endif - #if HT_NVIDIA - case hipSuccess: - return "CUDA_SUCCESS"; - case hipErrorInvalidValue: - return "CUDA_ERROR_INVALID_VALUE"; - case hipErrorOutOfMemory: - return "CUDA_ERROR_OUT_OF_MEMORY"; - case hipErrorNotInitialized: - return "CUDA_ERROR_NOT_INITIALIZED"; - case hipErrorDeinitialized: - return "CUDA_ERROR_DEINITIALIZED"; - case hipErrorProfilerDisabled: - return "CUDA_ERROR_PROFILER_DISABLED"; - case hipErrorProfilerNotInitialized: - return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; - case hipErrorProfilerAlreadyStarted: - return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; - case hipErrorProfilerAlreadyStopped: - return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; - case hipErrorInvalidConfiguration: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidSymbol: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidDevicePointer: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidMemcpyDirection: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInsufficientDriver: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorMissingConfiguration: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorPriorLaunchFailure: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidDeviceFunction: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorNoDevice: - return "CUDA_ERROR_NO_DEVICE"; - case hipErrorInvalidDevice: - return "CUDA_ERROR_INVALID_DEVICE"; - case hipErrorInvalidPitchValue: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidImage: - return "CUDA_ERROR_INVALID_IMAGE"; - case hipErrorInvalidContext: - return "CUDA_ERROR_INVALID_CONTEXT"; - case hipErrorContextAlreadyCurrent: - return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; - case hipErrorMapFailed: - return "CUDA_ERROR_MAP_FAILED"; - case hipErrorUnmapFailed: - return "CUDA_ERROR_UNMAP_FAILED"; - case hipErrorArrayIsMapped: - return "CUDA_ERROR_ARRAY_IS_MAPPED"; - case hipErrorAlreadyMapped: - return "CUDA_ERROR_ALREADY_MAPPED"; - case hipErrorNoBinaryForGpu: - return "CUDA_ERROR_NO_BINARY_FOR_GPU"; - case hipErrorAlreadyAcquired: - return "CUDA_ERROR_ALREADY_ACQUIRED"; - case hipErrorNotMapped: - return "CUDA_ERROR_NOT_MAPPED"; - case hipErrorNotMappedAsArray: - return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; - case hipErrorNotMappedAsPointer: - return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; - case hipErrorECCNotCorrectable: - return "CUDA_ERROR_ECC_UNCORRECTABLE"; - case hipErrorUnsupportedLimit: - return "CUDA_ERROR_UNSUPPORTED_LIMIT"; - case hipErrorContextAlreadyInUse: - return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; - case hipErrorPeerAccessUnsupported: - return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; - case hipErrorInvalidKernelFile: - return "CUDA_ERROR_INVALID_PTX"; - case hipErrorInvalidGraphicsContext: - return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; - case hipErrorInvalidSource: - return "CUDA_ERROR_INVALID_SOURCE"; - case hipErrorFileNotFound: - return "CUDA_ERROR_FILE_NOT_FOUND"; - case hipErrorSharedObjectSymbolNotFound: - return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; - case hipErrorSharedObjectInitFailed: - return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; - case hipErrorOperatingSystem: - return "CUDA_ERROR_OPERATING_SYSTEM"; - case hipErrorInvalidHandle: - return "CUDA_ERROR_INVALID_HANDLE"; - case hipErrorIllegalState: - return "CUDA_ERROR_ILLEGAL_STATE"; - case hipErrorNotFound: - return "CUDA_ERROR_NOT_FOUND"; - case hipErrorNotReady: - return "CUDA_ERROR_NOT_READY"; - case hipErrorIllegalAddress: - return "CUDA_ERROR_ILLEGAL_ADDRESS"; - case hipErrorLaunchOutOfResources: - return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; - case hipErrorLaunchTimeOut: - return "CUDA_ERROR_LAUNCH_TIMEOUT"; - case hipErrorPeerAccessAlreadyEnabled: - return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; - case hipErrorPeerAccessNotEnabled: - return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; - case hipErrorSetOnActiveProcess: - return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; - case hipErrorContextIsDestroyed: - return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; - case hipErrorAssert: - return "CUDA_ERROR_ASSERT"; - case hipErrorHostMemoryAlreadyRegistered: - return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; - case hipErrorHostMemoryNotRegistered: - return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; - case hipErrorLaunchFailure: - return "CUDA_ERROR_LAUNCH_FAILED"; - case hipErrorNotSupported: - return "CUDA_ERROR_NOT_SUPPORTED"; - case hipErrorUnknown: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorRuntimeMemory: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorRuntimeOther: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorCooperativeLaunchTooLarge: - return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE"; - case hipErrorStreamCaptureUnsupported: - return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED"; - case hipErrorStreamCaptureInvalidated: - return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED"; - case hipErrorStreamCaptureMerge: - return "CUDA_ERROR_STREAM_CAPTURE_MERGE"; - case hipErrorStreamCaptureUnmatched: - return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED"; - case hipErrorStreamCaptureUnjoined: - return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED"; - case hipErrorStreamCaptureIsolation: - return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION"; - case hipErrorStreamCaptureImplicit: - return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT"; - case hipErrorCapturedEvent: - return "CUDA_ERROR_CAPTURED_EVENT"; - case hipErrorStreamCaptureWrongThread: - return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD"; - case hipErrorGraphExecUpdateFailure: - return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE"; - default: - return "CUDA_ERROR_UNKNOWN"; - #endif - } -} +/** + * @addtogroup hipDrvGetErrorName hipDrvGetErrorName + * @{ + * @ingroup ErrorTest + * `hipDrvGetErrorName(hipError_t hip_error)` - + * Return hip error as text string form. + */ -// Functional test case -// Test case to verify the returned error name is same as generated error name. - -TEST_CASE("Unit_hipDrvGetErrorName_Functional") { +/** + * Test Description + * ------------------------ + * - Validate that the correct string is returned for each supported + * device error enumeration. + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorName.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") { const char* error_string = nullptr; - hipError_t error_ret; const auto enumerator = - GENERATE(from_range(std::begin(kErrorEnumerators), - std::end(kErrorEnumerators))); - error_ret = hipDrvGetErrorName(enumerator, &error_string); + GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); + + HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string)); + REQUIRE(error_string != nullptr); REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0); - REQUIRE(error_ret == hipSuccess); } -// Negative test cases. - -TEST_CASE("Unit_hipDrvGetErrorName_Negative") { +/** + * Test Description + * ------------------------ + * - Validate handling of invalid arguments: + * -# When error enumerator is invalid (-1) + * - AMD expected output: return "hipErrorUnknown" + * - NVIDIA expected output: return "cudaErrorUnknown" + * -# When nullptr is passed as store location + * - Expected output: return "hipErrorInvalidValue" + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorName.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") { const char* error_string = nullptr; SECTION("pass unknown value to hipError") { - REQUIRE((hipDrvGetErrorName(static_cast(-1), &error_string)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast(-1), &error_string)), + hipErrorInvalidValue); } - #if HT_AMD +#if HT_AMD // segfaults on NVIDIA SECTION("pass nullptr to error string") { - REQUIRE((hipDrvGetErrorString(static_cast(0), nullptr)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue); } - #endif +#endif } diff --git a/catch/unit/errorHandling/hipDrvGetErrorString.cc b/catch/unit/errorHandling/hipDrvGetErrorString.cc index 2b51a82422..5f35c344fe 100644 --- a/catch/unit/errorHandling/hipDrvGetErrorString.cc +++ b/catch/unit/errorHandling/hipDrvGetErrorString.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,247 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include #include -#include "errorEnumerators.h" -// Local Function to return the error string. +#include "error_handling_common.hh" -static const char *ErrorString(hipError_t enumerator) { - switch (enumerator) { - case hipSuccess: - return "no error"; - case hipErrorInvalidValue: - return "invalid argument"; - case hipErrorOutOfMemory: - return "out of memory"; - case hipErrorNotInitialized: - return "initialization error"; - case hipErrorDeinitialized: - return "driver shutting down"; - case hipErrorProfilerDisabled: - return "profiler disabled while using external profiling tool"; - case hipErrorProfilerNotInitialized: - #if HT_AMD - return "profiler is not initialized"; - #elif HT_NVIDIA - return "profiler not initialized: call cudaProfilerInitialize()"; - #endif - case hipErrorProfilerAlreadyStarted: - return "profiler already started"; - case hipErrorProfilerAlreadyStopped: - return "profiler already stopped"; - #if HT_AMD - case hipErrorInvalidConfiguration: - return "invalid configuration argument"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidPitchValue: - return "invalid pitch argument"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidSymbol: - return "invalid device symbol"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidDevicePointer: - return "invalid device pointer"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidMemcpyDirection: - return "invalid copy direction for memcpy"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInsufficientDriver: - return "driver version is insufficient for runtime version"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorMissingConfiguration: - return "__global__ function call is not configured"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorPriorLaunchFailure: - return "unspecified launch failure in prior launch"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidDeviceFunction: - return "invalid device function"; - #elif HT_NVIDIA - return "unknown error"; - #endif - case hipErrorNoDevice: - #if HT_AMD - return "no ROCm-capable device is detected"; - #elif HT_NVIDIA - return "no CUDA-capable device is detected"; - #endif - case hipErrorInvalidDevice: - return "invalid device ordinal"; - case hipErrorInvalidImage: - return "device kernel image is invalid"; - case hipErrorInvalidContext: - return "invalid device context"; - case hipErrorContextAlreadyCurrent: - #if HT_AMD - return "context is already current context"; - #elif HT_NVIDIA - return "context already current"; - #endif - case hipErrorMapFailed: - return "mapping of buffer object failed"; - case hipErrorUnmapFailed: - return "unmapping of buffer object failed"; - case hipErrorArrayIsMapped: - return "array is mapped"; - case hipErrorAlreadyMapped: - return "resource already mapped"; - case hipErrorNoBinaryForGpu: - return "no kernel image is available for execution on the device"; - case hipErrorAlreadyAcquired: - return "resource already acquired"; - case hipErrorNotMapped: - return "resource not mapped"; - case hipErrorNotMappedAsArray: - return "resource not mapped as array"; - case hipErrorNotMappedAsPointer: - return "resource not mapped as pointer"; - case hipErrorECCNotCorrectable: - return "uncorrectable ECC error encountered"; - case hipErrorUnsupportedLimit: - return "limit is not supported on this architecture"; - case hipErrorContextAlreadyInUse: - return "exclusive-thread device already in use by a different thread"; - case hipErrorPeerAccessUnsupported: - return "peer access is not supported between these two devices"; - case hipErrorInvalidKernelFile: - #if HT_AMD - return "invalid kernel file"; - #elif HT_NVIDIA - return "a PTX JIT compilation failed"; - #endif - case hipErrorInvalidGraphicsContext: - return "invalid OpenGL or DirectX context"; - case hipErrorInvalidSource: - return "device kernel image is invalid"; - case hipErrorFileNotFound: - return "file not found"; - case hipErrorSharedObjectSymbolNotFound: - return "shared object symbol not found"; - case hipErrorSharedObjectInitFailed: - return "shared object initialization failed"; - case hipErrorOperatingSystem: - return "OS call failed or operation not supported on this OS"; - case hipErrorInvalidHandle: - return "invalid resource handle"; - case hipErrorIllegalState: - return "the operation cannot be performed in the present state"; - case hipErrorNotFound: - return "named symbol not found"; - case hipErrorNotReady: - return "device not ready"; - case hipErrorIllegalAddress: - return "an illegal memory access was encountered"; - case hipErrorLaunchOutOfResources: - return "too many resources requested for launch"; - case hipErrorLaunchTimeOut: - return "the launch timed out and was terminated"; - case hipErrorPeerAccessAlreadyEnabled: - return "peer access is already enabled"; - case hipErrorPeerAccessNotEnabled: - return "peer access has not been enabled"; - case hipErrorSetOnActiveProcess: - return "cannot set while device is active in this process"; - case hipErrorContextIsDestroyed: - return "context is destroyed"; - case hipErrorAssert: - return "device-side assert triggered"; - case hipErrorHostMemoryAlreadyRegistered: - return "part or all of the requested memory range is already mapped"; - case hipErrorHostMemoryNotRegistered: - return "pointer does not correspond to a registered memory region"; - case hipErrorLaunchFailure: - return "unspecified launch failure"; - case hipErrorCooperativeLaunchTooLarge: - return "too many blocks in cooperative launch"; - case hipErrorNotSupported: - return "operation not supported"; - case hipErrorStreamCaptureUnsupported: - return "operation not permitted when stream is capturing"; - case hipErrorStreamCaptureInvalidated: - return "operation failed due to a previous error during capture"; - case hipErrorStreamCaptureMerge: - return "operation would result in a merge of separate capture sequences"; - case hipErrorStreamCaptureUnmatched: - return "capture was not ended in the same stream as it began"; - case hipErrorStreamCaptureUnjoined: - return "capturing stream has unjoined work"; - case hipErrorStreamCaptureIsolation: - return "dependency created on uncaptured work in another stream"; - case hipErrorStreamCaptureImplicit: - return "operation would make the legacy stream depend on a capturing blocking stream"; //NOLINT - case hipErrorCapturedEvent: - return "operation not permitted on an event last recorded in a capturing stream"; //NOLINT - case hipErrorStreamCaptureWrongThread: - return "attempt to terminate a thread-local capture sequence from another thread"; //NOLINT - case hipErrorGraphExecUpdateFailure: - return "the graph update was not performed because it included changes which violated constraints specific to instantiated graph update"; //NOLINT - case hipErrorRuntimeMemory: - return "runtime memory call returned error"; - case hipErrorRuntimeOther: - return "runtime call other than memory returned error"; - case hipErrorUnknown: - default: - #if HT_AMD - return "unknown error"; - #elif HT_NVIDIA - return "unknown error"; - #endif - } -} +/** + * @addtogroup hipDrvGetErrorString hipDrvGetErrorString + * @{ + * @ingroup ErrorTest + * `hipDrvGetErrorString(hipError_t hipError)` - + * Return handy text string message to explain the error which occurred. + */ -// Test case to verify the returned error string is -// same as generated error string. - -TEST_CASE("Unit_hipDrvGetErrorString_Functional") { +/** + * Test Description + * ------------------------ + * - Validate that the correct string is returned for each supported + * device error enumeration. + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorString.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") { const char* error_string = nullptr; const auto enumerator = - GENERATE(from_range(std::begin(kErrorEnumerators), - std::end(kErrorEnumerators))); - hipError_t error_ret = hipDrvGetErrorString(enumerator, &error_string); + GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); + + HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string)); + REQUIRE(error_string != nullptr); REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0); - REQUIRE(error_ret == hipSuccess); } -// Negative test cases. - -TEST_CASE("Unit_hipDrvGetErrorString_Negative") { +/** + * Test Description + * ------------------------ + * - Validate handling of invalid arguments: + * -# When error enumerator is invalid (-1) + * - Expected output: return "hipErrorInvalidValue" + * -# When nullptr is passed as store location + * - Expected output: return "hipErrorInvalidValue" + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorString.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") { const char* error_string = nullptr; SECTION("pass unknown value to hipError") { - REQUIRE((hipDrvGetErrorString(static_cast(-1), &error_string)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(-1), &error_string)), + hipErrorInvalidValue); } - #if HT_AMD +#if HT_AMD // segfaults on NVIDIA SECTION("pass nullptr to error string") { - REQUIRE((hipDrvGetErrorString(static_cast(0), nullptr)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(0), nullptr)), + hipErrorInvalidValue); } - #endif +#endif } diff --git a/catch/unit/errorHandling/hipGetErrorName.cc b/catch/unit/errorHandling/hipGetErrorName.cc index a498e62387..75d9f4a549 100644 --- a/catch/unit/errorHandling/hipGetErrorName.cc +++ b/catch/unit/errorHandling/hipGetErrorName.cc @@ -20,10 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "errorEnumerators.h" #include -#include + +#include "error_handling_common.hh" /** * @addtogroup hipGetErrorName hipGetErrorName @@ -49,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorName_Positive_Basic") { const char* error_string = nullptr; const auto enumerator = GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); error_string = hipGetErrorName(enumerator); diff --git a/catch/unit/errorHandling/hipGetErrorString.cc b/catch/unit/errorHandling/hipGetErrorString.cc index e38f0dc54e..6becd9fdb6 100644 --- a/catch/unit/errorHandling/hipGetErrorString.cc +++ b/catch/unit/errorHandling/hipGetErrorString.cc @@ -20,9 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "errorEnumerators.h" #include -#include + +#include "error_handling_common.hh" /** * @addtogroup hipGetErrorString hipGetErrorString @@ -48,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorString_Positive_Basic") { const char* error_string = nullptr; const auto enumerator = GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); error_string = hipGetErrorString(enumerator); diff --git a/catch/unit/errorHandling/hipPeekAtLastError.cc b/catch/unit/errorHandling/hipPeekAtLastError.cc index ae22a3067a..aac75e41e1 100644 --- a/catch/unit/errorHandling/hipPeekAtLastError.cc +++ b/catch/unit/errorHandling/hipPeekAtLastError.cc @@ -21,7 +21,6 @@ THE SOFTWARE. */ #include -#include #include /** @@ -56,7 +55,8 @@ TEST_CASE("Unit_hipPeekAtLastError_Positive_Basic") { * Test Description * ------------------------ * - Validate that appropriate error is returned when working with multiple threads. - * - Validate that appropriate error is returned for getting the last erro when working with multiple threads. + * - Validate that appropriate error is returned for getting the last error when working with + * multiple threads. * - Cause error on purpose within one of the threads. * Test source * ------------------------ diff --git a/catch/unit/event/hipEventCreateWithFlags.cc b/catch/unit/event/hipEventCreateWithFlags.cc index 875d7f4295..cb3e0d4ed5 100644 --- a/catch/unit/event/hipEventCreateWithFlags.cc +++ b/catch/unit/event/hipEventCreateWithFlags.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include + #include constexpr size_t buffer_size = (1024*1024); diff --git a/catch/unit/executionControl/CMakeLists.txt b/catch/unit/executionControl/CMakeLists.txt index a27f9dc4f1..877addd79b 100644 --- a/catch/unit/executionControl/CMakeLists.txt +++ b/catch/unit/executionControl/CMakeLists.txt @@ -4,6 +4,7 @@ set(TEST_SRC hipFuncSetSharedMemConfig.cc hipFuncSetAttribute.cc hipFuncGetAttributes.cc + hipLaunchKernel.cc hipLaunchCooperativeKernel.cc hipLaunchCooperativeKernelMultiDevice.cc ) @@ -12,6 +13,7 @@ if(HIP_PLATFORM MATCHES "amd") set(TEST_SRC ${TEST_SRC} hipExtLaunchKernel.cc hipExtLaunchMultiKernelMultiDevice.cc + launch_api.cc ) endif() diff --git a/catch/unit/executionControl/hipExtLaunchKernel.cc b/catch/unit/executionControl/hipExtLaunchKernel.cc index 8b85507de5..1b336b4d74 100644 --- a/catch/unit/executionControl/hipExtLaunchKernel.cc +++ b/catch/unit/executionControl/hipExtLaunchKernel.cc @@ -49,19 +49,19 @@ TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") { TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") { SECTION("blockDim.x == maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u)); } SECTION("blockDim.y == maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u)); } SECTION("blockDim.z == maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u)); } @@ -111,28 +111,28 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") { } SECTION("blockDim.x > maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), hipErrorInvalidConfiguration); } SECTION("blockDim.y > maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), hipErrorInvalidConfiguration); } SECTION("blockDim.z > maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u), hipErrorInvalidConfiguration); } SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); const unsigned int dim = std::ceil(std::cbrt(max)); HIP_CHECK_ERROR( hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim}, @@ -141,7 +141,7 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") { } SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u), hipErrorOutOfMemory); diff --git a/catch/unit/executionControl/hipFuncGetAttributes.cc b/catch/unit/executionControl/hipFuncGetAttributes.cc index e97f44300e..c3ce1c835e 100644 --- a/catch/unit/executionControl/hipFuncGetAttributes.cc +++ b/catch/unit/executionControl/hipFuncGetAttributes.cc @@ -35,8 +35,8 @@ TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { SECTION("binaryVersion") { #if HT_NVIDIA - const auto major = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMajor); - const auto minor = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMinor); + const auto major = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMajor, 0); + const auto minor = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMinor, 0); REQUIRE(attr.binaryVersion == major * 10 + minor); #elif HT_AMD REQUIRE(attr.binaryVersion > 0); @@ -48,7 +48,7 @@ TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { SECTION("constSizeBytes") { REQUIRE(attr.constSizeBytes == kConstSizeBytes); } SECTION("maxThreadsPerBlock") { - REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock)); + REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0)); } SECTION("numRegs") { REQUIRE(attr.numRegs >= 0); } @@ -57,7 +57,7 @@ TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { SECTION("sharedSizeBytes") { REQUIRE(attr.sharedSizeBytes <= - GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock)); + GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); } } diff --git a/catch/unit/executionControl/hipLaunchCooperativeKernel.cc b/catch/unit/executionControl/hipLaunchCooperativeKernel.cc index eb7eb2293f..5beeed4621 100644 --- a/catch/unit/executionControl/hipLaunchCooperativeKernel.cc +++ b/catch/unit/executionControl/hipLaunchCooperativeKernel.cc @@ -61,19 +61,19 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Parameters") { } SECTION("blockDim.x == maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr)); } SECTION("blockDim.y == maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, nullptr, 0, nullptr)); } SECTION("blockDim.z == maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, nullptr, 0, nullptr)); } @@ -128,28 +128,28 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { } SECTION("blockDim.x > maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr), hipErrorInvalidConfiguration); } SECTION("blockDim.y > maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, y, 1}, nullptr, 0, nullptr), hipErrorInvalidConfiguration); } SECTION("blockDim.z > maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, z}, nullptr, 0, nullptr), hipErrorInvalidConfiguration); } SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); const unsigned int dim = std::ceil(std::cbrt(max)); HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim}, nullptr, 0, nullptr), @@ -163,7 +163,7 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, reinterpret_cast(kernel), 1, 0)); const unsigned int multiproc_count = - GetDeviceAttribute(0, hipDeviceAttributeMultiprocessorCount); + GetDeviceAttribute(hipDeviceAttributeMultiprocessorCount, 0); const unsigned int dim = std::ceil(std::cbrt(max_blocks * multiproc_count)); HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{dim, dim, dim}, dim3{1, 1, 1}, nullptr, 0, nullptr), @@ -171,7 +171,7 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { } SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, max, nullptr), hipErrorCooperativeLaunchTooLarge); diff --git a/catch/unit/executionControl/hipLaunchKernel.cc b/catch/unit/executionControl/hipLaunchKernel.cc new file mode 100644 index 0000000000..d9272107eb --- /dev/null +++ b/catch/unit/executionControl/hipLaunchKernel.cc @@ -0,0 +1,156 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include +#include +#include + +TEST_CASE("Unit_hipLaunchKernel_Positive_Basic") { + SECTION("Kernel with no arguments") { + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel_42), dim3{1, 1, 1}, dim3{1, 1, 1}, + kernel_args, 0, nullptr)); + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +TEST_CASE("Unit_hipLaunchKernel_Positive_Parameters") { + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, + nullptr, 0, nullptr)); + } + + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, + nullptr, 0, nullptr)); + } + + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, + nullptr, 0, nullptr)); + } +} + +TEST_CASE("Unit_hipLaunchKernel_Negative_Parameters") { + SECTION("f == nullptr") { + HIP_CHECK_ERROR(hipLaunchKernel(nullptr, dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidDeviceFunction); + } + + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{0, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 0, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 0}, dim3{1, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{0, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 0, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 0}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, y, 1}, + nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, z}, + nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{dim, dim, dim}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, max, nullptr), + hipErrorOutOfMemory); + } + + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, stream), + hipErrorInvalidValue); + } +} \ No newline at end of file diff --git a/catch/unit/executionControl/launch_api.cc b/catch/unit/executionControl/launch_api.cc new file mode 100644 index 0000000000..64cdcf8266 --- /dev/null +++ b/catch/unit/executionControl/launch_api.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include + +TEST_CASE("Unit_hipLaunchByPtr_Positive_Basic") { + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, 4); + + SECTION("hipConfigureCall") { HIP_CHECK(hipConfigureCall(dim3{1}, dim3{1}, 0, nullptr)); } + + SECTION("__hipPushCallConfiguration") { + HIP_CHECK(__hipPushCallConfiguration(dim3{1}, dim3{1}, 0, nullptr)); + } + + int* arg = alloc.ptr(); + HIP_CHECK(hipSetupArgument(&arg, sizeof(int*), 0)); + + HIP_CHECK(hipLaunchByPtr(reinterpret_cast(kernel_42))); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(alloc.ptr()[0] == 42); +} + +TEST_CASE("Unit_hipLaunchByPtr_Negative_Parameters") { + HIP_CHECK(hipConfigureCall(dim3{1}, dim3{1}, 0, nullptr)); + HIP_CHECK_ERROR(hipLaunchByPtr(nullptr), hipErrorInvalidDeviceFunction); +} + +TEST_CASE("Unit___hipPushCallConfiguration_Positive_Basic") { + StreamGuard stream_guard(Streams::created); + HIP_CHECK(__hipPushCallConfiguration(dim3{1, 2, 3}, dim3{3, 2, 1}, 1024, stream_guard.stream())); + + dim3 grid; + dim3 block; + size_t shmem; + hipStream_t stream; + HIP_CHECK(__hipPopCallConfiguration(&grid, &block, &shmem, &stream)); + + REQUIRE(grid.x == 1); + REQUIRE(grid.y == 2); + REQUIRE(grid.z == 3); + REQUIRE(block.x == 3); + REQUIRE(block.y == 2); + REQUIRE(block.z == 1); + REQUIRE(shmem == 1024); + REQUIRE(stream == stream_guard.stream()); +} \ No newline at end of file diff --git a/catch/unit/g++/hipMalloc.cc b/catch/unit/g++/hipMalloc.cc index 22e3141c0e..c0ee9d0892 100644 --- a/catch/unit/g++/hipMalloc.cc +++ b/catch/unit/g++/hipMalloc.cc @@ -18,7 +18,7 @@ * */ #include -#include + #include "hipMalloc.h" /** * @addtogroup hipMalloc hipMalloc diff --git a/catch/unit/gcc/gccTest.cc b/catch/unit/gcc/gccTest.cc index 6c64553558..6332540682 100644 --- a/catch/unit/gcc/gccTest.cc +++ b/catch/unit/gcc/gccTest.cc @@ -18,7 +18,7 @@ * */ #include -#include + extern "C" { #include "LaunchKernel.h" } diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index cef5d2f5b7..93de37d12e 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -148,11 +148,27 @@ set(TEST_SRC hipGraphKernelNodeGetAttribute.cc hipGraphKernelNodeSetAttribute.cc hipGraphMemAllocNodeGetParams.cc - hipDrvGraphAddMemcpyNode.cc hipGraphAddMemAllocNode.cc hipGraphAddMemFreeNode.cc + hipDrvGraphMemcpyNodeGetParams.cc + hipDrvGraphMemcpyNodeSetParams.cc + hipDeviceSetGraphMemAttribute.cc + hipDeviceGetGraphMemAttribute.cc + hipDeviceGraphMemTrim.cc ) +if(HIP_PLATFORM MATCHES "amd") + set(AMD_SRC + # hipGraphAddNode, hipGraphNodeParams, hipMemcpyNodeParams are not mapped to Nvidia + hipGraphAddNode.cc + # hipDrvGraphAddMemsetNode, HIP_MEMSET_NODE_PARAMS are not mapped to Nvidia + hipDrvGraphAddMemsetNode.cc + # hipDrvGraphAddMemcpyNode not mapped to Nvidia + hipDrvGraphAddMemcpyNode.cc + ) + set(TEST_SRC ${TEST_SRC} ${AMD_SRC}) +endif() + add_custom_target(add_Kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/add_Kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../graph/add_Kernel.code -I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) hip_add_exe_to_target(NAME GraphsTest2 diff --git a/catch/unit/graph/graph_memset_node_test_common.hh b/catch/unit/graph/graph_memset_node_test_common.hh index f4b957283e..b23a169339 100644 --- a/catch/unit/graph/graph_memset_node_test_common.hh +++ b/catch/unit/graph/graph_memset_node_test_common.hh @@ -26,14 +26,14 @@ THE SOFTWARE. #include #include -template void GraphMemsetNodeCommonPositive(F f) { +template void GraphMemsetNodeCommonPositive(F f) { const size_t width = GENERATE(1, 64, kPageSize / sizeof(T) + 1); const size_t height = GENERATE(1, 2, 1024); DYNAMIC_SECTION("Width: " << width << " Height: " << height) { LinearAllocGuard2D alloc(width, height); constexpr T set_value = 42; - hipMemsetParams params = {}; + Tp params = {}; params.dst = alloc.ptr(); params.elementSize = sizeof(T); params.width = width; @@ -50,7 +50,7 @@ template void GraphMemsetNodeCommonPositive(F f) { } } -template void MemsetCommonNegative(F f, hipMemsetParams params) { +template void MemsetCommonNegative(F f, T params) { SECTION("pMemsetParams == nullptr") { HIP_CHECK_ERROR(f(nullptr), hipErrorInvalidValue); } SECTION("pMemsetParams.dst == nullptr") { diff --git a/catch/unit/graph/hipDeviceGetGraphMemAttribute.cc b/catch/unit/graph/hipDeviceGetGraphMemAttribute.cc new file mode 100644 index 0000000000..7aa10fe61c --- /dev/null +++ b/catch/unit/graph/hipDeviceGetGraphMemAttribute.cc @@ -0,0 +1,205 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup hipDeviceGetGraphMemAttribute hipDeviceGetGraphMemAttribute + * @{ + * @ingroup GraphTest + * `hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value)` - + * Get the mem attribute for graphs. + */ + +static constexpr auto element_count{64 * 1024 * 1024}; + + +/* Create graph with memory node */ +static void createGraph(hipGraphExec_t* graph_exec, int** device_alloc = nullptr) { + constexpr size_t num_bytes = element_count * sizeof(int); + + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t alloc_node; + hipMemAllocNodeParams alloc_param; + memset(&alloc_param, 0, sizeof(alloc_param)); + alloc_param.bytesize = num_bytes; + alloc_param.poolProps.allocType = hipMemAllocationTypePinned; + alloc_param.poolProps.location.id = 0; + alloc_param.poolProps.location.type = hipMemLocationTypeDevice; + + HIP_CHECK(hipGraphAddMemAllocNode(&alloc_node, graph, nullptr, 0, &alloc_param)); + REQUIRE(alloc_param.dptr != nullptr); + int* A_d = reinterpret_cast(alloc_param.dptr); + + if (device_alloc == nullptr) { + hipGraphNode_t free_node; + HIP_CHECK(hipGraphAddMemFreeNode(&free_node, graph, &alloc_node, 1, (void*)A_d)); + } else { + *device_alloc = A_d; + } + + // Instantiate graph + HIP_CHECK(hipGraphInstantiate(graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphDestroy(graph)); +} + +/* check if memory attributes for graphs contain expected values */ +static void checkGraphMemAttribute(size_t used_mem, size_t high_mem) { + size_t read_mem; + hipGraphMemAttributeType attr = hipGraphMemAttrUsedMemCurrent; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == used_mem); + + attr = hipGraphMemAttrReservedMemCurrent; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == used_mem); + + attr = hipGraphMemAttrUsedMemHigh; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == high_mem); + + attr = hipGraphMemAttrReservedMemHigh; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == high_mem); +} + +/** + * Test Description + * ------------------------ + * - Basic test to verify that hipDeviceGetGraphMemAttribute return correct memory attribute values + * when graphs with allocation nodes are launched, and after memory is freed to OS. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Positive_DoubleMemory") { + hipGraphExec_t graph_exec1, graph_exec2; + int *dev_p1, *dev_p2; + + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + + createGraph(&graph_exec1, &dev_p1); + HIP_CHECK(hipGraphLaunch(graph_exec1, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(element_count * sizeof(int), element_count * sizeof(int)); + + createGraph(&graph_exec2, &dev_p2); + HIP_CHECK(hipGraphLaunch(graph_exec2, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(2 * element_count * sizeof(int), 2 * element_count * sizeof(int)); + + HIP_CHECK(hipFree(dev_p1)); + HIP_CHECK(hipFree(dev_p2)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec1)); + HIP_CHECK(hipGraphExecDestroy(graph_exec2)); + HIP_CHECK(hipDeviceGraphMemTrim(0)); + checkGraphMemAttribute(0, 2 * element_count * sizeof(int)); +} + +/** + * Test Description + * ------------------------ + * - Basic test to verify that hipDeviceGetGraphMemAttribute return correct memory attribute values + * when graphs with allocation and free nodes are launched, and after memory is freed to OS. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Positive_ReuseMemory") { + hipGraphExec_t graph_exec1, graph_exec2; + + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + + createGraph(&graph_exec1); + HIP_CHECK(hipGraphLaunch(graph_exec1, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(element_count * sizeof(int), element_count * sizeof(int)); + + createGraph(&graph_exec2); + HIP_CHECK(hipGraphLaunch(graph_exec2, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(element_count * sizeof(int), element_count * sizeof(int)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec1)); + HIP_CHECK(hipGraphExecDestroy(graph_exec2)); + HIP_CHECK(hipDeviceGraphMemTrim(0)); + checkGraphMemAttribute(0, element_count * sizeof(int)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipDeviceGetGraphMemAttribute behavior with invalid arguments: + * -# Device is not valid + * -# Attribute value is not valid + * -# Get value is nullptr + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Negative_Parameters") { + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int num_dev = 0; + HIP_CHECK(hipGetDeviceCount(&num_dev)); + + hipGraphMemAttributeType attr = hipGraphMemAttrUsedMemHigh; + size_t get_value = 0; + + SECTION("Device is not valid") { + HIP_CHECK_ERROR( + hipDeviceGetGraphMemAttribute(num_dev, attr, reinterpret_cast(&get_value)), + hipErrorInvalidDevice); + } + + SECTION("Attribute value is not valid") { + HIP_CHECK_ERROR(hipDeviceGetGraphMemAttribute(0, static_cast(0x7), + reinterpret_cast(&get_value)), + hipErrorInvalidValue); + } + + SECTION("Get value is nullptr") { + HIP_CHECK_ERROR(hipDeviceGetGraphMemAttribute(0, attr, nullptr), hipErrorInvalidValue); + } +} diff --git a/catch/unit/graph/hipDeviceGraphMemTrim.cc b/catch/unit/graph/hipDeviceGraphMemTrim.cc new file mode 100644 index 0000000000..5d730a3f88 --- /dev/null +++ b/catch/unit/graph/hipDeviceGraphMemTrim.cc @@ -0,0 +1,73 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +/** + * @addtogroup hipDeviceGraphMemTrim hipDeviceGraphMemTrim + * @{ + * @ingroup GraphTest + * `hipDeviceGraphMemTrim(int device)` - Free unused memory on specific device used for graph back + * to OS. + */ + +/** + * Test Description + * ------------------------ + * - Basic test to verify that unused memory used for graph can be freed on each device. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGraphMemTrim.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGraphMemTrim_Positive_Default") { + const auto device = GENERATE(range(0, HipTest::getDeviceCount())); + + // Check for each device + HIP_CHECK(hipDeviceGraphMemTrim(device)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipDeviceGraphMemTrim behavior with invalid arguments: + * -# Device is not valid + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGraphMemTrim.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGraphMemTrim_Negative_Parameters") { + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int num_dev = 0; + HIP_CHECK(hipGetDeviceCount(&num_dev)); + + SECTION("Device is not valid") { + HIP_CHECK_ERROR(hipDeviceGraphMemTrim(num_dev), hipErrorInvalidDevice); + } +} diff --git a/catch/unit/graph/hipDeviceSetGraphMemAttribute.cc b/catch/unit/graph/hipDeviceSetGraphMemAttribute.cc new file mode 100644 index 0000000000..a103b12fee --- /dev/null +++ b/catch/unit/graph/hipDeviceSetGraphMemAttribute.cc @@ -0,0 +1,117 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +/** + * @addtogroup hipDeviceSetGraphMemAttribute hipDeviceSetGraphMemAttribute + * @{ + * @ingroup GraphTest + * `hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value)` - + * Set the mem attribute for graphs. + */ + +static void GraphSetGetAttribute(int device, hipGraphMemAttributeType attr, size_t set_value) { + size_t get_value = 100; + HIP_CHECK(hipDeviceSetGraphMemAttribute(device, attr, &set_value)); + HIP_CHECK(hipDeviceGetGraphMemAttribute(device, attr, &get_value)); + REQUIRE(get_value == set_value); +} + +/** + * Test Description + * ------------------------ + * - Basic test to verify that valid attributes can be reset to zero. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceSetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceSetGraphMemAttribute_Positive_Default") { + const auto device = GENERATE(range(0, HipTest::getDeviceCount())); + const auto attr_type = GENERATE(hipGraphMemAttrUsedMemHigh, hipGraphMemAttrReservedMemHigh); + + // Check if attributes can be reset + size_t set_value = 0; + GraphSetGetAttribute(device, attr_type, set_value); +} + + +/** + * Test Description + * ------------------------ + * - Test to verify hipDeviceSetGraphMemAttribute behavior with invalid arguments: + * -# Device is not valid + * -# Attribute value is not supported + * -# Attribute value is not valid + * -# Set hipGraphMemAttrUsedMemHigh to non-zero + * -# Set hipGraphMemAttrReservedMemHigh to non-zero + * Test source + * ------------------------ + * - /unit/graph/hipDeviceSetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceSetGraphMemAttribute_Negative_Parameters") { + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int num_dev = 0; + HIP_CHECK(hipGetDeviceCount(&num_dev)); + + hipGraphMemAttributeType attr = hipGraphMemAttrUsedMemHigh; + size_t set_value = 0; + + SECTION("device is not valid") { + HIP_CHECK_ERROR( + hipDeviceSetGraphMemAttribute(num_dev, attr, reinterpret_cast(&set_value)), + hipErrorInvalidDevice); + } + + SECTION("Attribute value is not supported") { + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, hipGraphMemAttrUsedMemCurrent, + reinterpret_cast(&set_value)), + hipErrorInvalidValue); + } + + SECTION("Attribute value is not valid") { + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, static_cast(0x7), + reinterpret_cast(&set_value)), + hipErrorInvalidValue); + } + + SECTION("Set hipGraphMemAttrUsedMemHigh to non-zero") { + size_t invalid_value = 1; + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, attr, reinterpret_cast(&invalid_value)), + hipErrorInvalidValue); + } + + SECTION("Set hipGraphMemAttrReservedMemHigh to non-zero") { + attr = hipGraphMemAttrReservedMemHigh; + size_t invalid_value = 1; + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, attr, reinterpret_cast(&invalid_value)), + hipErrorInvalidValue); + } +} diff --git a/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc b/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc index c3bbf553db..53c9ec079f 100644 --- a/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc +++ b/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc @@ -17,11 +17,30 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include +#include #include +#include + #include "numeric" +#include "graph_tests_common.hh" + #define XSIZE 32 +/** + * @addtogroup hipDrvGraphAddMemcpyNode hipDrvGraphAddMemcpyNode + * @{ + * @ingroup GraphTest + * `hipDrvGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const + * hipGraphNode_t *pDependencies, size_t numDependencies, const HIP_MEMCPY3D* copyParams, hipCtx_t + ctx)` + - Creates a memcpy node and adds it to a graph + */ + +// APIs hipDrvGraphMemcpyNodeGetParams, hipDrvGraphMemcpyNodeSetParams are yet to be implemented in HIP runtime. +#if 0 /** * Test Description * ------------------------ @@ -362,3 +381,282 @@ TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_MulitDevice") { } } #endif + +/** + * Test Description + * ------------------------ + * - Verify basic API behavior. A Memcpy node is created with parameters set according to the + * test run, after which the graph is run and the memcpy results are verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_Positive_Basic") { + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Device to host") { + Memcpy3DDeviceToHostShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to device") { + Memcpy3DHostToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to host") { + Memcpy3DHostToHostShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_Positive_Array") { + CHECK_IMAGE_SUPPORT + + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Array from/to Host") { + DrvMemcpy3DArrayHostShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Array from/to Device") { + DrvMemcpy3DArrayDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} +#endif // if 0 + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# dstPitch < width + * -# srcPitch < width + * -# dstPitch > max pitch + * -# srcPitch > max pitch + * -# WidthInBytes + dstXInBytes > dstPitch + * -# WidthInBytes + srcXInBytes > srcPitch + * -# dstY out of bounds + * -# srcY out of bounds + * -# dstZ out of bounds + * -# srcZ out of bounds + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_Negative_Parameters") { + using namespace std::placeholders; + + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipCtx_t context) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + GraphAddNodeCommonNegativeTests( + std::bind(hipDrvGraphAddMemcpyNode, _1, _2, _3, _4, ¶ms, context), graph); + + SECTION("dst_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("src_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("dstPitch < width") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidPitchValue); + } + + SECTION("srcPitch < width") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidPitchValue); + } + + SECTION("dstPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = attr; + auto params = GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("srcPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = attr; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("WidthInBytes + dstXInBytes > dstPitch") { + hipPos invalid_pos = dst_pos; + invalid_pos.x = dst_ptr.pitch - extent.width + 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("WidthInBytes + srcXInBytes > srcPitch") { + hipPos invalid_pos = src_pos; + invalid_pos.x = src_ptr.pitch - extent.width + 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("dstY out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.y = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("srcY out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.y = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("dstZ out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.z = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("srcZ out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.z = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + }; + + SECTION("Host to Device") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), + make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, context); + } + + SECTION("Device to Host") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent, + hipMemcpyDeviceToHost, context); + } + + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), + make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost, context); + } + + SECTION("Device to Device") { + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, context); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} \ No newline at end of file diff --git a/catch/unit/graph/hipDrvGraphAddMemsetNode.cc b/catch/unit/graph/hipDrvGraphAddMemsetNode.cc new file mode 100644 index 0000000000..cbdd461c5a --- /dev/null +++ b/catch/unit/graph/hipDrvGraphAddMemsetNode.cc @@ -0,0 +1,672 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include +#include +#include +#include +#include + +#include "graph_memset_node_test_common.hh" +#include "graph_tests_common.hh" + +#define SIZE 1024 +static char memSetVal = 'a'; + +/** + * @addtogroup hipDrvGraphAddMemsetNode hipDrvGraphAddMemsetNode + * @{ + * @ingroup GraphTest + * `hipDrvGraphAddMemsetNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, const hipGraphNode_t* + * dependencies, size_t numDependencies, const HIP_MEMSET_NODE_PARAMS* memsetParams, hipCtx_t ctx)` + * - Creates a memset node and adds it to a graph + */ + +/** + * Test Description + * ------------------------ + * - Verify that all elements of destination memory are set to the correct value. + * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different + * height and width, both on host and device. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEMPLATE_TEST_CASE("Unit_hipDrvGraphAddMemsetNode_Positive_Basic", "", uint8_t, uint16_t, + uint32_t) { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + const auto f = [&context](HIP_MEMSET_NODE_PARAMS* params) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipDrvGraphAddMemsetNode(&node, graph, nullptr, 0, params, context)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; + }; + + GraphMemsetNodeCommonPositive(f); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# pGraphNode is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# pMemsetParams is nullptr + * -# pMemsetParams::dst is nullptr + * -# pMemsetParams::elementSize is different from 1, 2, and 4 + * -# pMemsetParams::width is zero + * -# pMemsetParams::width is larger than the allocated memory region + * -# pMemsetParams::height is zero + * -# pMemsetParams::pitch is less than width when height is more than 1 + * -# pMemsetParams::pitch * pMemsetParams::height is larger than the allocated memory region + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_Negative_Parameters") { + using namespace std::placeholders; + + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + LinearAllocGuard alloc(LinearAllocs::hipMalloc, 4 * sizeof(int)); + HIP_MEMSET_NODE_PARAMS params = {}; + params.dst = alloc.ptr(); + params.elementSize = sizeof(*alloc.ptr()); + params.width = 1; + params.height = 1; + params.value = 42; + + GraphAddNodeCommonNegativeTests( + std::bind(hipDrvGraphAddMemsetNode, _1, _2, _3, _4, ¶ms, context), graph); + + hipGraphNode_t node = nullptr; + MemsetCommonNegative(std::bind(hipDrvGraphAddMemsetNode, &node, graph, nullptr, 0, _1, context), + params); + + HIP_CHECK(hipGraphDestroy(graph)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 2D array using hipMallocPitch. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char), numW{SIZE}, numH{SIZE}, pitch_A; + char* A_d; + + hipGraph_t graph; + std::vector nodeDependencies; + // Host memory. + char* A_h = new char[numW * numH]; + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + *(A_h + i * numH + j) = ' '; + } + } + // 2D Memory allocation hipMallocPitch + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, numH)); + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = pitch_A; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = numH; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = make_hipPitchedPtr(A_d, pitch_A, numW, numH); + auto dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); + auto extent = make_hipExtent(width, numH, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + REQUIRE(*(A_h + i * numH + j) == memSetVal); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + delete[] A_h; + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A; + char* A_d; + + // Initialize the host memory + std::vector A_h(numW, ' '); + + hipGraph_t graph; + std::vector nodeDependencies; + // 1D Memory allocation hipMallocPitch + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, 1)); + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = pitch_A; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = make_hipPitchedPtr(A_d, pitch_A, numW, 1); + auto dstPtr = make_hipPitchedPtr(A_h.data(), width, numW, 1); + auto extent = make_hipExtent(width, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + REQUIRE(A_h[i] == memSetVal); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char); + size_t numW = SIZE, numH = SIZE; + + // Host Memory + char* A_h = new char[numW * numH]; + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + *(A_h + i * numH + j) = ' '; + } + } + hipGraph_t graph; + std::vector nodeDependencies; + + hipPitchedPtr A_d; + hipExtent extent3D = make_hipExtent(width, numH, 1); + + // Allocate 3D memory. + HIPCHECK(hipMalloc3D(&A_d, extent3D)); + + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d.ptr); + memsetParams.value = memSetVal; + memsetParams.pitch = A_d.pitch; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = numH; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = A_d; + auto dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); + auto extent = make_hipExtent(width, numH, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + REQUIRE(*(A_h + i * numH + j) == memSetVal); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + delete[] A_h; + HIP_CHECK(hipFree(A_d.ptr)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMalloc3D. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char); + size_t numW = SIZE; + + // Initialize the host memory + std::vector A_h(numW, ' '); + + hipGraph_t graph; + std::vector nodeDependencies; + + hipPitchedPtr A_d; + hipExtent extent1D = make_hipExtent(width, 1, 1); + + // Allocate 3D memory. + HIPCHECK(hipMalloc3D(&A_d, extent1D)); + + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d.ptr); + memsetParams.value = memSetVal; + memsetParams.pitch = A_d.pitch; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = A_d; + auto dstPtr = make_hipPitchedPtr(A_h.data(), width, numW, 1); + auto extent = make_hipExtent(width, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + REQUIRE(A_h[i] == memSetVal); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)) + HIP_CHECK(hipFree(A_d.ptr)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMalloc. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + char* A_d; + size_t NumW = SIZE; + size_t Nbytes1D = SIZE * sizeof(char); + + // Initialize the host memory + std::vector A_h(NumW, ' '); + + // Allocate memory to Device pointer + HIP_CHECK(hipMalloc(reinterpret_cast(&A_d), Nbytes1D)); + + // Create the graph + hipGraph_t graph; + std::vector nodeDependencies; + hipGraphNode_t memsetNode, memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // Add Memset node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = Nbytes1D; + memsetParams.elementSize = sizeof(char); + memsetParams.width = NumW; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0}; + hipPitchedPtr hostPitchedPtr{A_h.data(), Nbytes1D, NumW, 0}; + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = devPitchedPtr; + auto dstPtr = hostPitchedPtr; + auto extent = make_hipExtent(Nbytes1D, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < NumW; i++) { + REQUIRE(A_h[i] == memSetVal); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMallocManaged. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMallocManaged") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + int managed = 0; + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); + INFO("hipDeviceAttributeManagedMemory: " << managed); + if (managed != 1) { + WARN( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" + "so defaulting to system memory."); + } + size_t Nbytes1D = SIZE * sizeof(char); + char* A_d; + // Initialize the host memory + std::vector A_h(SIZE, ' '); + // Device Memory + HIP_CHECK(hipMallocManaged(&A_d, SIZE * sizeof(char))); + // Create the graph + hipGraph_t graph; + std::vector nodeDependencies; + hipGraphNode_t memsetNode, memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // Add Memset node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = Nbytes1D; + memsetParams.elementSize = sizeof(char); + memsetParams.width = SIZE; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, SIZE, 1}; + hipPitchedPtr hostPitchedPtr{A_h.data(), Nbytes1D, SIZE, 1}; + + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = devPitchedPtr; + auto dstPtr = hostPitchedPtr; + auto extent = make_hipExtent(Nbytes1D, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < SIZE; i++) { + REQUIRE(A_h[i] == memSetVal); + } + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} diff --git a/catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc b/catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc new file mode 100644 index 0000000000..544c983bd1 --- /dev/null +++ b/catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc @@ -0,0 +1,94 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +// hipDrvGraphAddMemcpyNode API is yet to be implemented in HIP runtime. +#if 0 +/** + * @addtogroup hipDrvGraphMemcpyNodeGetParams hipDrvGraphMemcpyNodeGetParams + * @{ + * @ingroup GraphTest + * `hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* nodeParams)` - + * Gets a memcpy node's parameters + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipDrvGraphMemcpyNodeSetParams_Positive_Basic + */ + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# pNodeParams is nullptr + * -# node is destroyed + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphMemcpyNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphMemcpyNodeGetParams_Negative_Parameters") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + + auto params = + GetDrvMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice); + + hipGraph_t graph = nullptr; + hipGraphNode_t node = nullptr; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeGetParams(nullptr, ¶ms), hipErrorInvalidValue); + } + + SECTION("pNodeParams == nullptr") { + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context)); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeGetParams(node, nullptr), hipErrorInvalidValue); + HIP_CHECK(hipGraphDestroy(graph)); + } + + SECTION("Node is destroyed") { + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeGetParams(node, ¶ms), hipErrorInvalidValue); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} +#endif // if 0 \ No newline at end of file diff --git a/catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc b/catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc new file mode 100644 index 0000000000..eaa3469924 --- /dev/null +++ b/catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc @@ -0,0 +1,317 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include +#include +#include + +// hipDrvGraphMemcpyNodeSetParams API is is yet to be implemented in HIP runtime. +#if 0 +/** + * @addtogroup hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeSetParams + * @{ + * @ingroup GraphTest + * `hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams)` - Sets a + * memcpy node's parameters + */ + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values after which the graph is + * executed and the results of the memcpy verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphMemcpyNodeSetParams_Positive_Basic") { + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Device to host") { + Memcpy3DDeviceToHostShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to device") { + Memcpy3DHostToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to host") { + Memcpy3DHostToHostShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +TEST_CASE("Unit_hipDrvGraphMemcpyNodeSetParams_Positive_Array") { + CHECK_IMAGE_SUPPORT + + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Array from/to Host") { + DrvMemcpy3DArrayHostShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Array from/to Device") { + DrvMemcpy3DArrayDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# dst is nullptr + * -# src is nullptr + * -# dstPitch < width + * -# srcPitch < width + * -# dstPitch > max pitch + * -# srcPitch > max pitch + * -# WidthInBytes + dstXInBytes > dstPitch + * -# WidthInBytes + srcXInBytes > srcPitch + * -# dstY out of bounds + * -# srcY out of bounds + * -# dstZ out of bounds + * -# srcZ out of bounds + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphMemcpyNodeSetParams_Negative_Parameters") { + using namespace std::placeholders; + + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipCtx_t context) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context)); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(nullptr, ¶ms), hipErrorInvalidValue); + } + + SECTION("dst_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.ptr = nullptr; + auto invalid_params = + GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("src_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.ptr = nullptr; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("dstPitch < width") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = extent.width - 1; + auto invalid_params = + GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), + hipErrorInvalidPitchValue); + } + + SECTION("srcPitch < width") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = extent.width - 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), + hipErrorInvalidPitchValue); + } + + SECTION("dstPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = attr; + auto invalid_params = + GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("srcPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = attr; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("WidthInBytes + dstXInBytes > dstPitch") { + hipPos invalid_pos = dst_pos; + invalid_pos.x = dst_ptr.pitch - extent.width + 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("WidthInBytes + srcXInBytes > srcPitch") { + hipPos invalid_pos = src_pos; + invalid_pos.x = src_ptr.pitch - extent.width + 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("dstY out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.y = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("srcY out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.y = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("dstZ out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.z = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("srcZ out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.z = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + }; + + SECTION("Host to Device") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), + make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, context); + } + + SECTION("Device to Host") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent, + hipMemcpyDeviceToHost, context); + } + + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), + make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost, context); + } + + SECTION("Device to Device") { + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, context); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} +#endif // if 0 \ No newline at end of file diff --git a/catch/unit/graph/hipGraphAddDependencies.cc b/catch/unit/graph/hipGraphAddDependencies.cc index 0102d90ca9..1281581f89 100644 --- a/catch/unit/graph/hipGraphAddDependencies.cc +++ b/catch/unit/graph/hipGraphAddDependencies.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphAddKernelNode.cc b/catch/unit/graph/hipGraphAddKernelNode.cc index 9f030c9807..dd6d9f6960 100644 --- a/catch/unit/graph/hipGraphAddKernelNode.cc +++ b/catch/unit/graph/hipGraphAddKernelNode.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + #define CODEOBJ_FILE "add_Kernel.code" #define KERNEL_NAME "Add" diff --git a/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc b/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc index 4bceaa41b7..effb4f68e6 100644 --- a/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc +++ b/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -75,7 +74,7 @@ void GraphMemcpyFromSymbolShell(void* symbol, size_t offset, const std::vector= 5.2 - */ + */ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Negative_Parameters") { using namespace std::placeholders; hipGraph_t graph = nullptr; diff --git a/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc b/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc index 1c8c047f9e..3163443944 100644 --- a/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc +++ b/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc @@ -23,7 +23,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -78,7 +77,7 @@ void GraphMemcpyToSymbolShell(const void* symbol, size_t offset, const std::vect * - Verify that data is correctly copied to a symbol. A graph is constructed to which a * MemcpyToSymbol node is added. After graph execution, a MemcpyFromSymbol is performed and * the copied values are compared against values known to have been copied to symbol memory - * previously. + * previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for source memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphAddMemsetNode.cc b/catch/unit/graph/hipGraphAddMemsetNode.cc index 4d4359b2b1..e11d08a4b3 100644 --- a/catch/unit/graph/hipGraphAddMemsetNode.cc +++ b/catch/unit/graph/hipGraphAddMemsetNode.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include #include @@ -77,7 +76,7 @@ TEMPLATE_TEST_CASE("Unit_hipGraphAddMemsetNode_Positive_Basic", "", uint8_t, uin return hipSuccess; }; - GraphMemsetNodeCommonPositive(f); + GraphMemsetNodeCommonPositive(f); } /** @@ -129,7 +128,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_Negative_Parameters") { * Allocate a 2D array using hipMallocPitch. Initialize the allocated memory * using hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { CHECK_IMAGE_SUPPORT @@ -147,22 +146,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { } } // 2D Memory allocation hipMallocPitch - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, - numH)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, numH)); // Create Graph HIP_CHECK(hipGraphCreate(&graph, 0)); hipGraphNode_t memsetNode, memcpyNode; // Add MemSet Node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = pitch_A; memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = numH; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipMemcpy3DParms myparms{}; @@ -173,21 +170,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { myparms.extent = make_hipExtent(width, numH, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { for (size_t j = 0; j < numH; j++) { - REQUIRE(*(A_h + i*numH + j) == memSetVal); + REQUIRE(*(A_h + i * numH + j) == memSetVal); } } HIP_CHECK(hipGraphExecDestroy(graphExec)); @@ -200,12 +196,12 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { * Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { CHECK_IMAGE_SUPPORT size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A; - char *A_d; + char* A_d; // Initialize the host memory std::vector A_h(numW, ' '); @@ -213,22 +209,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { hipGraph_t graph; std::vector nodeDependencies; // 1D Memory allocation hipMallocPitch - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, - 1)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, 1)); // Create Graph HIP_CHECK(hipGraphCreate(&graph, 0)); hipGraphNode_t memsetNode, memcpyNode; // Add MemSet Node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = pitch_A; memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipMemcpy3DParms myparms{}; @@ -239,15 +233,14 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { myparms.extent = make_hipExtent(width, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); @@ -264,7 +257,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { * Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { CHECK_IMAGE_SUPPORT @@ -300,8 +293,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = numH; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // MemCpy params @@ -315,22 +307,21 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { // Add MemCpy Node HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { for (size_t j = 0; j < numH; j++) { - REQUIRE(*(A_h + i*numH + j) == memSetVal); + REQUIRE(*(A_h + i * numH + j) == memSetVal); } } HIP_CHECK(hipGraphExecDestroy(graphExec)); @@ -343,7 +334,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { * Allocate a 1D array using hipMalloc3D. Initialize the allocated * memory using hipGraphAddMemsetNode. Copy the values in device * memory to host using hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { CHECK_IMAGE_SUPPORT @@ -375,8 +366,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // MemCpy params @@ -390,21 +380,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { // Add MemCpy Node HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); @@ -415,9 +404,9 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { * Allocate a 1D array using hipMalloc. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { - char *A_d; + char* A_d; size_t NumW = SIZE; size_t Nbytes1D = SIZE * sizeof(char); @@ -436,14 +425,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { // Add Memset node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = Nbytes1D; memsetParams.elementSize = sizeof(char); memsetParams.width = NumW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0}; @@ -456,20 +444,19 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { myparms.extent = make_hipExtent(Nbytes1D, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < NumW; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); @@ -479,16 +466,15 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { int managed = 0; - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeManagedMemory, 0)); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); INFO("hipDeviceAttributeManagedMemory: " << managed); if (managed != 1) { WARN( - "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" - "so defaulting to system memory."); + "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" + "so defaulting to system memory."); } size_t Nbytes1D = SIZE * sizeof(char); - char *A_d; + char* A_d; // Initialize the host memory std::vector A_h(SIZE, ' '); // Device Memory @@ -502,14 +488,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { // Add Memset node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = Nbytes1D; memsetParams.elementSize = sizeof(char); memsetParams.width = SIZE; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node @@ -524,21 +509,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { myparms.extent = make_hipExtent(Nbytes1D, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < SIZE; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); diff --git a/catch/unit/graph/hipGraphAddNode.cc b/catch/unit/graph/hipGraphAddNode.cc new file mode 100644 index 0000000000..b321082d95 --- /dev/null +++ b/catch/unit/graph/hipGraphAddNode.cc @@ -0,0 +1,552 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "graph_memset_node_test_common.hh" +#include "graph_tests_common.hh" + +#pragma clang diagnostic ignored "-Wunused-parameter" + +/** + * @addtogroup hipGraphAddNode hipGraphAddNode + * @{ + * @ingroup GraphTest + * `hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t + * *pDependencies, size_t numDependencies, hipGraphNodeParams *nodeParams)` - Creates a node and + * adds it to a graph + */ + +static constexpr size_t N = 1024; + +static void callbackfunc(void* A_h) { + int* A = reinterpret_cast(A_h); + for (int i = 0; i < N; i++) { + A[i] = i; + } +} + +static void __global__ vector_square(int* A_d) { + for (int i = 0; i < N; i++) { + A_d[i] = A_d[i] * A_d[i]; + } +} + +/** + * Test Description + * ------------------------ + * - Verify that all elements of destination memory are set to the correct value. + * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different + * height and width, both on host and device. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEMPLATE_TEST_CASE("Unit_hipGraphAddNodeTypeMemset_Positive_Basic", "", uint8_t, uint16_t, + uint32_t) { + const auto f = [](hipMemsetParams* params) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t node = nullptr; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeMemset; + node_params.memset.dst = params->dst; + node_params.memset.elementSize = params->elementSize; + node_params.memset.width = params->width; + node_params.memset.height = params->height; + node_params.memset.pitch = params->pitch; + node_params.memset.value = params->value; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; + }; + + GraphMemsetNodeCommonPositive(f); +} + +/** + * Test Description + * ------------------------ + * - Verify that kernel node added with hipGraphAddNode executes correctly and does the square of + * values in the device array. The result is copied to host and verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeKernel_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph; + hipGraphExec_t graphExec; + + int* A_d{nullptr}; + int *A_h{nullptr}, *B_h{nullptr}; + HipTest::initArrays(&A_d, nullptr, nullptr, &A_h, &B_h, nullptr, N, false); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t memcpyH2D_A, memcpyD2H_B; + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, allocation_size, + hipMemcpyHostToDevice)); + + hipGraphNode_t node; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeKernel; + void* kernel_args[] = {&A_d}; + node_params.kernel.func = reinterpret_cast(vector_square); + node_params.kernel.gridDim = dim3(1); + node_params.kernel.blockDim = dim3(1); + node_params.kernel.sharedMemBytes = 0; + node_params.kernel.kernelParams = reinterpret_cast(kernel_args); + node_params.kernel.extra = nullptr; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_B, graph, nullptr, 0, B_h, A_d, allocation_size, + hipMemcpyDeviceToHost)); + + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &node, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &node, &memcpyD2H_B, 1)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify execution result + for (size_t i = 0; i < N; i++) { + if (B_h[i] != (A_h[i] * A_h[i])) { + REQUIRE(false); + } + } + + HipTest::freeArrays(A_d, nullptr, nullptr, A_h, B_h, nullptr, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Verify that host node added with hipGraphAddNode executes correctly and sets values of host + * array. The result is verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeHost_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph; + hipGraphExec_t graphExec; + int* A_h = (int*)malloc(allocation_size); + std::fill_n(A_h, N, 0); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + hipGraphNode_t node; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeHost; + node_params.host.fn = callbackfunc; + node_params.host.userData = A_h; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify execution result + for (size_t i = 0; i < N; i++) { + if (A_h[i] != static_cast(i)) { + REQUIRE(false); + } + } + + free(A_h); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Verify that when graph is created and childgraph node is added with hipGraphAddNode, the + * childgraph executes correctly. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeChildGraph_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph, childgraph; + hipGraphExec_t graphExec; + + int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}; + int *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + for (size_t i = 0; i < N; i++) { + B_h[i] = i; + } + + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, childGraphNode1, memcpyH2D_C; + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&childgraph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, childgraph, nullptr, 0, B_d, B_h, allocation_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, childgraph, nullptr, 0, A_h, B_d, allocation_size, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, graph, nullptr, 0, C_d, C_h, allocation_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, graph, nullptr, 0, A_h, C_d, allocation_size, + hipMemcpyDeviceToHost)); + + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeGraph; + node_params.graph.graph = childgraph; + HIP_CHECK(hipGraphAddNode(&childGraphNode1, graph, nullptr, 0, &node_params)); + + HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_B, &memcpyH2D_A, 1)); + + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, childgraph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify execution result + for (size_t i = 0; i < N; i++) { + if (B_h[i] != A_h[i]) { + REQUIRE(false); + } + } + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(childgraph)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + + +static hipError_t MemcpyType3DWrapper(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipStream_t stream = nullptr) { + auto parms = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeMemcpy; + memset(&node_params.memcpy, 0, sizeof(hipMemcpyNodeParams)); + node_params.memcpy.copyParams = parms; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} + +/** + * Test Description + * ------------------------ + * - Verify basic API behavior. A Memcpy node is created using hipGraphAddNode with parameters + * set according to the test run, after which the graph is run and the memcpy results are verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeMemcpy_Positive_Basic") { + constexpr bool async = false; + + SECTION("Device to host") { Memcpy3DDeviceToHostShell(MemcpyType3DWrapper); } + + SECTION("Device to host with default kind") { + Memcpy3DDeviceToHostShell(MemcpyType3DWrapper); + } + + SECTION("Host to device") { Memcpy3DHostToDeviceShell(MemcpyType3DWrapper); } + + SECTION("Host to device with default kind") { + Memcpy3DHostToDeviceShell(MemcpyType3DWrapper); + } + + SECTION("Host to host") { Memcpy3DHostToHostShell(MemcpyType3DWrapper); } + + SECTION("Host to host with default kind") { Memcpy3DHostToHostShell(MemcpyType3DWrapper); } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + } + + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + } + + SECTION("Array from/to Host") { Memcpy3DArrayHostShell(MemcpyType3DWrapper); } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220 + SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell(MemcpyType3DWrapper); } +#endif +} + + +/** + * Test Description + * ------------------------ + * - Verify basic API functionality where one event record node is added to graph with + * hipGraphAddNode and its correct behavior is verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeEventRecord_Positive_Basic") { + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + hipGraphNode_t node; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeEventRecord; + node_params.eventRecord.event = event; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + // Wait for event + HIP_CHECK(hipEventSynchronize(event)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Verify basic API functionality where one event record and one event wait nodes are added to + * graph with hipGraphAddNode and their correct behavior is verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeEventWait_Positive_Basic") { + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + hipGraphNode_t event_rec_node, event_wait_node; + + // Create a event record node in graph + hipGraphNodeParams rec_node_params = {}; + rec_node_params.type = hipGraphNodeTypeEventRecord; + rec_node_params.eventRecord.event = event; + HIP_CHECK(hipGraphAddNode(&event_rec_node, graph, nullptr, 0, &rec_node_params)); + + // Create a event wait node in graph + hipGraphNodeParams wait_node_params = {}; + rec_node_params.type = hipGraphNodeTypeWaitEvent; + rec_node_params.eventWait.event = event; + HIP_CHECK(hipGraphAddNode(&event_wait_node, graph, &event_rec_node, 1, &wait_node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify basic API functionality when memalloc and memfree nodes are added with + * hipGraphAddNode. Verify that memory is allocated correctly and graph behaves as expected when + * free node is added to the same graph. + * Test source + * ------------------------ + * - /unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeMemAlloc_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t alloc_node; + hipGraphNodeParams alloc_node_params = {}; + alloc_node_params.type = hipGraphNodeTypeMemAlloc; + memset(&alloc_node_params.alloc, 0, sizeof(hipMemAllocNodeParams)); + alloc_node_params.alloc.bytesize = allocation_size; + alloc_node_params.alloc.poolProps.allocType = hipMemAllocationTypePinned; + alloc_node_params.alloc.poolProps.location.id = 0; + alloc_node_params.alloc.poolProps.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipGraphAddNode(&alloc_node, graph, nullptr, 0, &alloc_node_params)); + + REQUIRE(alloc_node_params.alloc.dptr != nullptr); + int* A_d = reinterpret_cast(alloc_node_params.alloc.dptr); + + hipGraphNode_t free_node; + hipGraphNodeParams free_node_params = {}; + free_node_params.type = hipGraphNodeTypeMemFree; + free_node_params.free.dptr = A_d; + HIP_CHECK(hipGraphAddNode(&free_node, graph, &alloc_node, 1, &free_node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipDeviceGraphMemTrim(0)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphAddNode behavior with invalid arguments: + * -# Nullptr graph + * -# Nullptr graph node + * -# Invalid numDependencies for null list of dependencies + * -# Node in dependency is from different graph + * -# Invalid numNodes + * -# Duplicate node in dependencies + * -# Nullptr params + * -# params type is invalid + * Test source + * ------------------------ + * - /unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNode_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + + hipGraphNode_t node; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeEventRecord; + node_params.eventRecord.event = event; + + GraphAddNodeCommonNegativeTests(std::bind(hipGraphAddNode, _1, _2, _3, _4, &node_params), graph); + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphAddNode(&node, graph, nullptr, 0, nullptr), hipErrorInvalidValue); + } + + SECTION("params type is invalid") { + node_params.type = static_cast(0x20); + HIP_CHECK_ERROR(hipGraphAddNode(&node, graph, nullptr, 0, &node_params), hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event)); +} diff --git a/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc b/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc index 585435e684..3d8681eeb4 100644 --- a/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc +++ b/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -86,7 +85,7 @@ void GraphExecMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_s * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but * incorrect parameters. After the graph is instantiated the parameters are updated to correct * values and the graph executed. After graph execution, a MemcpyFromSymbol is performed and the - * copied values are compared against values known to have been copied to symbol memory previously. + * copied values are compared against values known to have been copied to symbol memory previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc b/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc index edecbfad9a..ee2282b425 100644 --- a/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc +++ b/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include -#include #include #include "graph_memset_node_test_common.hh" @@ -46,7 +45,7 @@ THE SOFTWARE. * which also constitutes a test for said API. * The test is repeated for all valid element sizes(1, * 2, 4), and several allocations of different width(height is always 1 because only 1D memset nodes - * can be updated), both on host and device + * can be updated), both on host and device * Test source * ------------------------ * - unit/graph/hipGraphExecMemsetNodeSetParams.cc diff --git a/catch/unit/graph/hipGraphGetEdges.cc b/catch/unit/graph/hipGraphGetEdges.cc index e2a863ef04..408ba88884 100644 --- a/catch/unit/graph/hipGraphGetEdges.cc +++ b/catch/unit/graph/hipGraphGetEdges.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphGetNodes.cc b/catch/unit/graph/hipGraphGetNodes.cc index 959c9c55b6..2d7837fd13 100644 --- a/catch/unit/graph/hipGraphGetNodes.cc +++ b/catch/unit/graph/hipGraphGetNodes.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphGetRootNodes.cc b/catch/unit/graph/hipGraphGetRootNodes.cc index 69e4b34de0..7e88953f39 100644 --- a/catch/unit/graph/hipGraphGetRootNodes.cc +++ b/catch/unit/graph/hipGraphGetRootNodes.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc b/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc index 7f1ac7fe3c..b8c10c3900 100644 --- a/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc +++ b/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -85,7 +84,7 @@ void GraphMemcpyFromSymbolSetParamsShell(const void* symbol, const void* alt_sym * - Verify that data is correctly copied from a symbol after node parameters are set following * node addition. A graph is constructed to which a MemcpyFromSymbol node is added with valid but * incorrect parameters. The parameters are then updated to correct values and the graph executed. - * Values in destination memory are compared against values known to be in symbol memory. + * Values in destination memory are compared against values known to be in symbol memory. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc b/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc index 0f84b6b283..b62b01cf5b 100644 --- a/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc +++ b/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -86,7 +85,7 @@ void GraphMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_symbo * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but * incorrect parameters. The parameters are then updated to correct values and the graph executed. * After graph execution, a MemcpyFromSymbol is performed and the copied values are compared against - * values known to have been copied to symbol memory previously. + * values known to have been copied to symbol memory previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphMemsetNodeGetParams.cc b/catch/unit/graph/hipGraphMemsetNodeGetParams.cc index 25fe849206..1c640db2de 100644 --- a/catch/unit/graph/hipGraphMemsetNodeGetParams.cc +++ b/catch/unit/graph/hipGraphMemsetNodeGetParams.cc @@ -19,7 +19,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include #include #include diff --git a/catch/unit/graph/hipGraphMemsetNodeSetParams.cc b/catch/unit/graph/hipGraphMemsetNodeSetParams.cc index d8f7cac249..d816e8b88e 100644 --- a/catch/unit/graph/hipGraphMemsetNodeSetParams.cc +++ b/catch/unit/graph/hipGraphMemsetNodeSetParams.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include -#include #include #include "graph_memset_node_test_common.hh" @@ -44,7 +43,7 @@ THE SOFTWARE. * The parameters are also verified via hipGraphMemsetNodeGetParams, which also constitutes a test * for said API. * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different - * height and width both on host and device + * height and width both on host and device * Test source * ------------------------ * - unit/graph/hipGraphMemsetNodeSetParams.cc @@ -100,7 +99,7 @@ TEMPLATE_TEST_CASE("Unit_hipGraphMemsetNodeSetParams_Positive_Basic", "", uint8_ return hipSuccess; }; - GraphMemsetNodeCommonPositive(f); + GraphMemsetNodeCommonPositive(f); } /** diff --git a/catch/unit/graph/hipGraphNodeGetDependencies.cc b/catch/unit/graph/hipGraphNodeGetDependencies.cc index 82e7ce9aef..a395ebc961 100644 --- a/catch/unit/graph/hipGraphNodeGetDependencies.cc +++ b/catch/unit/graph/hipGraphNodeGetDependencies.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphNodeGetDependentNodes.cc b/catch/unit/graph/hipGraphNodeGetDependentNodes.cc index 63d5c4f889..f3a6d984f9 100644 --- a/catch/unit/graph/hipGraphNodeGetDependentNodes.cc +++ b/catch/unit/graph/hipGraphNodeGetDependentNodes.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphRemoveDependencies.cc b/catch/unit/graph/hipGraphRemoveDependencies.cc index f29498950b..46d4d6ff10 100644 --- a/catch/unit/graph/hipGraphRemoveDependencies.cc +++ b/catch/unit/graph/hipGraphRemoveDependencies.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipLaunchHostFunc.cc b/catch/unit/graph/hipLaunchHostFunc.cc index 1d9ea95b00..fe82055061 100644 --- a/catch/unit/graph/hipLaunchHostFunc.cc +++ b/catch/unit/graph/hipLaunchHostFunc.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamBeginCapture.cc b/catch/unit/graph/hipStreamBeginCapture.cc index 9814a05097..cbe8185818 100644 --- a/catch/unit/graph/hipStreamBeginCapture.cc +++ b/catch/unit/graph/hipStreamBeginCapture.cc @@ -19,7 +19,7 @@ THE SOFTWARE. #include #include -#include + #include "stream_capture_common.hh" // NOLINT #pragma clang diagnostic ignored "-Wunused-variable" @@ -56,8 +56,7 @@ static void hostNodeCallback(void* data) { } template -void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, - hipStream_t stream) { +void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(T); @@ -89,8 +88,7 @@ void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, std::fill_n(A_h.host_ptr(), N, static_cast(i)); HIP_CHECK(hipGraphLaunch(graphExec, stream)); HIP_CHECK(hipStreamSynchronize(stream)); - ArrayFindIfNot(B_h.host_ptr(), - static_cast(i) * static_cast(i), N); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i) * static_cast(i), N); } HIP_CHECK(hipGraphExecDestroy(graphExec)) @@ -117,16 +115,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { StreamGuard stream_guard(stream_type); hipStream_t stream = stream_guard.stream(); - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); EventsGuard events_guard(3); StreamsGuard streams_guard(2); SECTION("Linear graph capture") { captureStreamAndLaunchGraph( - [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, - hipStream_t stream) { + [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) { return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); }, captureMode, stream); @@ -134,10 +131,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { SECTION("Branched graph capture") { captureStreamAndLaunchGraph( - [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, - float* B_d, size_t N, hipStream_t stream) { - captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, - streams_guard.stream_list(), events_guard.event_list()); + [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, + hipStream_t stream) { + captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(), + events_guard.event_list()); }, captureMode, stream); } @@ -173,8 +170,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { hipErrorIllegalState); } SECTION("Creating hipStream with invalid mode") { - HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue); } #if HT_NVIDIA // EXSWHTEC-216 SECTION("Stream capture on uninitialized stream returns error code.") { @@ -182,8 +178,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { StreamGuard sg(Streams::created); return sg.stream(); }; - HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), - hipStreamCaptureModeGlobal), + HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), hipStreamCaptureModeGlobal), hipErrorContextIsDestroyed); } #endif @@ -207,8 +202,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { StreamGuard stream_guard(stream_type); hipStream_t s = stream_guard.stream(); - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(s, captureMode)); @@ -218,8 +213,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { /* Local function for inter stream event synchronization */ -static void interStrmEventSyncCapture(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; @@ -266,8 +260,7 @@ static void interStrmEventSyncCapture(const hipStream_t& stream1, /* Local function for colligated stream capture */ -static void colligatedStrmCapture(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; @@ -310,8 +303,7 @@ static void colligatedStrmCapture(const hipStream_t& stream1, /* Local function for colligated stream capture functionality */ -static void colligatedStrmCaptureFunc(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(int); @@ -331,10 +323,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, // Capture 2 streams HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal)); - captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), - N, stream1); - captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), - N, stream2); + captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); + captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); @@ -370,9 +360,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, /* Stream Capture thread function */ -static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, - int* B_h, int* B_d, hipGraph_t* graph, - size_t N, hipStreamCaptureMode mode) { +static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d, + hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) { // Capture stream HIP_CHECK(hipStreamBeginCapture(stream, mode)); captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); @@ -404,10 +393,10 @@ static void multithreadedTest(hipStreamCaptureMode mode) { LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); // Launch 2 threads to capture the 2 streams into graphs - std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), - B_h.host_ptr(), B_d.ptr(), &graph1, N, mode); - std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), - D_h.host_ptr(), D_d.ptr(), &graph2, N, mode); + std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), + B_d.ptr(), &graph1, N, mode); + std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), + D_d.ptr(), &graph2, N, mode); t1.join(); t2.join(); @@ -480,11 +469,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") { TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); - StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, - minPriority); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); - StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, - maxPriority); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); interStrmEventSyncCapture(stream1, stream2); } @@ -533,11 +520,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") { TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); - StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, - minPriority); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); - StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, - maxPriority); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); colligatedStrmCapture(stream1, stream2); } @@ -578,8 +563,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") { * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") { - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); multithreadedTest(captureMode); } @@ -708,8 +693,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") { HIP_CHECK(hipEventRecord(events[2], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, - streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); @@ -751,8 +735,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") { dummyKernel<<<1, 1, 0, streams[0]>>>(); // Since stream[1] is already in capture mode due to event wait // hipStreamBeginCapture on stream[1] is expected to return error. - HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], - hipStreamCaptureModeGlobal), + HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal), hipErrorIllegalState); } @@ -785,8 +768,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") { for (int i = 0; i < 3; i++) { HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem); - HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, - stream1)); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1)); HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i])); } // Instantiate and execute all graphs @@ -825,22 +807,19 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") { EventsGuard events_guard(1); hipEvent_t e = events_guard[0]; - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("Synchronize stream during capture") { - HIP_CHECK_ERROR(hipStreamSynchronize(stream), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported); } SECTION("Query stream during capture") { - HIP_CHECK_ERROR(hipStreamQuery(stream), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported); } #if HT_NVIDIA SECTION("Synchronize device during capture") { - HIP_CHECK_ERROR(hipDeviceSynchronize(), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported); } SECTION("Synchronize event during capture") { HIP_CHECK(hipEventRecord(e, stream)); @@ -884,17 +863,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") { HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("hipMalloc during capture") { - HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported); } SECTION("hipMemcpy during capture") { - HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), - hipMemcpyHostToDevice), + HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice), hipErrorStreamCaptureImplicit); } SECTION("hipMemset during capture") { - HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), - hipErrorStreamCaptureImplicit); + HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit); } } #endif @@ -931,8 +907,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { HIP_CHECK(hipEventRecord(e, stream1)); HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); - HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), - hipErrorStreamCaptureUnjoined); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } SECTION("End strm capture when forked strm still has operations") { EventsGuard events_guard(2); @@ -946,8 +921,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { HIP_CHECK(hipEventRecord(e2, stream2)); HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); - HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), - hipErrorStreamCaptureUnjoined); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } } /** @@ -970,19 +944,17 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") { SUCCEED("skipping the testcases as numDevices < 2"); return; } - hipStream_t* stream = reinterpret_cast - (malloc(devcount * sizeof(hipStream_t))); + hipStream_t* stream = reinterpret_cast(malloc(devcount * sizeof(hipStream_t))); REQUIRE(stream != nullptr); - hipGraph_t* graph = reinterpret_cast - (malloc(devcount * sizeof(hipGraph_t))); + hipGraph_t* graph = reinterpret_cast(malloc(devcount * sizeof(hipGraph_t))); REQUIRE(graph != nullptr); int **devMem{nullptr}, **hostMem{nullptr}; hostMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(hostMem != nullptr); devMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(devMem != nullptr); - hipGraphExec_t* graphExec = reinterpret_cast - (malloc(devcount * sizeof(hipGraphExec_t))); + hipGraphExec_t* graphExec = + reinterpret_cast(malloc(devcount * sizeof(hipGraphExec_t))); // Capture stream in each device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); @@ -994,15 +966,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") { for (int i = 0; i < (dev + 1); i++) { incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]); } - HIP_CHECK(hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), - hipMemcpyDefault, stream[dev])); + HIP_CHECK( + hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev])); HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev])); } // Launch the captured graphs in the respective device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); - HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev])); } // Validate output @@ -1069,8 +1040,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") { HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), - hipMemcpyDefault, streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault, + streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); @@ -1108,23 +1079,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { hipGraph_t graphs[3]; StreamsGuard streams(3); EventsGuard events(4); - LinearAllocGuard hostMem_g1 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard hostMem_g2 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard hostMem_g3 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard devMem_g1 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); - LinearAllocGuard devMem_g2 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); - LinearAllocGuard devMem_g3 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard hostMem_g1 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g2 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g3 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g1 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g2 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g3 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); - std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), - hostMem_g3.host_ptr()}; - std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), - devMem_g3.ptr()}; + std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()}; + std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()}; // Create a device memory of size int and initialize it to 0 for (int i = 0; i < 3; i++) { memset(hostMem[i], 0, sizeof(int)); @@ -1148,16 +1111,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), - hipMemcpyDefault, streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture // Start capturing graph2 from stream 2 HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal)); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); - HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), - hipMemcpyDefault, streams[1])); + HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1])); HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture // Start capturing graph3 from stream 3 HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal)); @@ -1166,8 +1127,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); - HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), - hipMemcpyDefault, streams[2])); + HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2])); HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int))); @@ -1211,40 +1171,32 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { EventsGuard events(7); // Allocate Device memory and Host memory size_t N = GRIDSIZE * BLOCKSIZE; - LinearAllocGuard Ah = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Ch = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard - (LinearAllocs::hipMalloc, N * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard - (LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ch = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0)); HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[4])); + HIP_CHECK( + hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4])); hipHostFn_t fn = hostNodeCallback; HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr)); HIP_CHECK(hipEventRecord(events[1], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0)); int* Ad_2nd_half = Ad.ptr() + N / 2; int* Ad_1st_half = Ad.ptr(); - mymul<<>>(Ad_2nd_half, - CONST_KER2_VAL); - mymul<<>>(Ad_1st_half, - CONST_KER1_VAL); + mymul<<>>(Ad_2nd_half, CONST_KER2_VAL); + mymul<<>>(Ad_1st_half, CONST_KER1_VAL); HIP_CHECK(hipEventRecord(events[2], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0)); - mymul<<>>(Ad_1st_half, - CONST_KER3_VAL); + mymul<<>>(Ad_1st_half, CONST_KER3_VAL); HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr)); HIP_CHECK(hipEventRecord(events[6], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0)); @@ -1255,8 +1207,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipEventRecord(events[4], streams[3])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0)); - HIP_CHECK(hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Execute and test the graph hipGraphExec_t graphExec{nullptr}; @@ -1269,11 +1221,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { HIP_CHECK(hipStreamSynchronize(streams[0])); for (size_t i = 0; i < N; i++) { if (i > (N / 2 - 1)) { - REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + - Ah.host_ptr()[i] * CONST_KER2_VAL)); + REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL)); } else { - REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + - Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); + REQUIRE(Ch.host_ptr()[i] == + (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); } } } @@ -1340,14 +1291,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; // Allocate device memory - LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); @@ -1357,10 +1306,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Stream Creation Before Capture") { StreamsGuard stream1(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture @@ -1368,10 +1317,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Synchronizing multiple streams during Capture") { StreamsGuard stream1(1), stream2(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream2[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream2[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamSynchronize(stream2[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); @@ -1380,20 +1329,20 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Stream Creation After Capture") { HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Stream Synchronize Before Capture") { StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); myadd<<>>(Ad.ptr(), Bd.ptr()); @@ -1404,10 +1353,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); } // Execute and test the graph @@ -1415,8 +1364,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { HIP_CHECK(hipGraphLaunch(graphExec, stream0[0])); HIP_CHECK(hipStreamSynchronize(stream0[0])); // Check output - HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } @@ -1437,20 +1385,16 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { * - HIP_VERSION >= 5.6 */ // Local function executed as thread -static void strmSyncThread(int *Ah, int *Ad, int *Bh, int *Bd, - int BLOCKSIZE, hipError_t *error) { +static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) { StreamsGuard stream(1); - HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream[0])); - HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); *error = hipStreamSynchronize(stream[0]); } // Local function executed as thread -static void captureStrmThread(hipGraph_t *graph, int *Ah, int *Ad, - int *Bh, int *Bd, int BLOCKSIZE, int GRIDSIZE, - hipStreamCaptureMode flag, hipError_t *error) { +static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, + int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) { StreamsGuard stream(1); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], flag)); @@ -1466,14 +1410,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { constexpr int VALUE1 = 7, VALUE2 = 11; hipGraph_t graph{nullptr}; // Allocate device memory - LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); @@ -1483,10 +1425,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { StreamsGuard stream(2); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); } @@ -1494,34 +1436,30 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") { StreamsGuard stream(2); // Capture streams into graph - HIP_CHECK(hipStreamBeginCapture(stream[0], - hipStreamCaptureModeThreadLocal)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); + HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); } #endif #if HT_AMD SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeGlobal, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeGlobal, &error); REQUIRE(error == hipErrorStreamCaptureUnsupported); } #endif SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeThreadLocal, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeThreadLocal, &error); REQUIRE(error == hipSuccess); } SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeRelaxed, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeRelaxed, &error); REQUIRE(error == hipSuccess); } if (graph != nullptr) { @@ -1532,8 +1470,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { HIP_CHECK(hipGraphLaunch(graphExec, stream[0])); HIP_CHECK(hipStreamSynchronize(stream[0])); // Check output - HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } diff --git a/catch/unit/graph/hipStreamEndCapture.cc b/catch/unit/graph/hipStreamEndCapture.cc index 39f25ce2a6..7ec1ea88a6 100644 --- a/catch/unit/graph/hipStreamEndCapture.cc +++ b/catch/unit/graph/hipStreamEndCapture.cc @@ -19,7 +19,6 @@ THE SOFTWARE. #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamGetCaptureInfo.cc b/catch/unit/graph/hipStreamGetCaptureInfo.cc index d8f8cb5d55..9c3317ed85 100644 --- a/catch/unit/graph/hipStreamGetCaptureInfo.cc +++ b/catch/unit/graph/hipStreamGetCaptureInfo.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include -#include #include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc b/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc index ea67318ef9..0dde7247b1 100644 --- a/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc +++ b/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc @@ -19,7 +19,6 @@ THE SOFTWARE. #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamIsCapturing.cc b/catch/unit/graph/hipStreamIsCapturing.cc index c6a77c316e..256d20f21d 100644 --- a/catch/unit/graph/hipStreamIsCapturing.cc +++ b/catch/unit/graph/hipStreamIsCapturing.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include -#include #include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc b/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc index e35dd317d6..e11e1c3e24 100644 --- a/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc +++ b/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #include "stream_capture_common.hh" @@ -367,7 +366,7 @@ TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") { const hipStreamUpdateCaptureDependenciesFlags flag = GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies); - HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); //hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); // hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag)); diff --git a/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc b/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc index c35fc18900..5ac784bc79 100644 --- a/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc +++ b/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/kernel/hipDynamicShared.cc b/catch/unit/kernel/hipDynamicShared.cc index c8593b0939..90de000d8f 100644 --- a/catch/unit/kernel/hipDynamicShared.cc +++ b/catch/unit/kernel/hipDynamicShared.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #pragma clang diagnostic ignored "-Wunused-parameter" diff --git a/catch/unit/kernel/hipDynamicShared2.cc b/catch/unit/kernel/hipDynamicShared2.cc index 47a94c1357..bcc5ecca85 100644 --- a/catch/unit/kernel/hipDynamicShared2.cc +++ b/catch/unit/kernel/hipDynamicShared2.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #define LEN (16 * 1024) #define SIZE (LEN * sizeof(float)) diff --git a/catch/unit/kernel/hipEmptyKernel.cc b/catch/unit/kernel/hipEmptyKernel.cc index 9262397416..eb6f9818ba 100644 --- a/catch/unit/kernel/hipEmptyKernel.cc +++ b/catch/unit/kernel/hipEmptyKernel.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #pragma clang diagnostic ignored "-Wunused-parameter" diff --git a/catch/unit/kernel/hipExtLaunchKernelGGL.cc b/catch/unit/kernel/hipExtLaunchKernelGGL.cc index c23ebb7f81..6cf89ade50 100644 --- a/catch/unit/kernel/hipExtLaunchKernelGGL.cc +++ b/catch/unit/kernel/hipExtLaunchKernelGGL.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + #include "hip/hip_ext.h" static unsigned threadsPerBlock = 256; diff --git a/catch/unit/kernel/hipGridLaunch.cc b/catch/unit/kernel/hipGridLaunch.cc index 7716b0781a..e009a29c77 100644 --- a/catch/unit/kernel/hipGridLaunch.cc +++ b/catch/unit/kernel/hipGridLaunch.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + static unsigned threadsPerBlock = 256; static unsigned blocksPerCU = 6; diff --git a/catch/unit/kernel/hipLanguageExtensions.cc b/catch/unit/kernel/hipLanguageExtensions.cc index 446c91a213..e303c52138 100644 --- a/catch/unit/kernel/hipLanguageExtensions.cc +++ b/catch/unit/kernel/hipLanguageExtensions.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include #pragma clang diagnostic ignored "-Wunused-variable" diff --git a/catch/unit/kernel/hipLaunchParm.cc b/catch/unit/kernel/hipLaunchParm.cc index 9ae028cac7..c5cf63cbbf 100644 --- a/catch/unit/kernel/hipLaunchParm.cc +++ b/catch/unit/kernel/hipLaunchParm.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include #pragma clang diagnostic ignored "-Wunused-variable" diff --git a/catch/unit/kernel/hipLaunchParmFunctor.cc b/catch/unit/kernel/hipLaunchParmFunctor.cc index 5b12ff52ae..a99aa5f412 100644 --- a/catch/unit/kernel/hipLaunchParmFunctor.cc +++ b/catch/unit/kernel/hipLaunchParmFunctor.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + class HipFunctorTests { public: diff --git a/catch/unit/kernel/hipPrintfKernel.cc b/catch/unit/kernel/hipPrintfKernel.cc index da81c767fa..c616f86eae 100644 --- a/catch/unit/kernel/hipPrintfKernel.cc +++ b/catch/unit/kernel/hipPrintfKernel.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #include #include "../kernel/printf_common.h" diff --git a/catch/unit/kernel/hipShflTests.cc b/catch/unit/kernel/hipShflTests.cc index 89c529c16b..3525602bd0 100644 --- a/catch/unit/kernel/hipShflTests.cc +++ b/catch/unit/kernel/hipShflTests.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include #include #include -#include #define WIDTH 4 @@ -32,20 +31,17 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Z 1 // Device (Kernel) function, it must be void -template -__global__ void matrixTranspose(T* out, T* in, const int width) { +template __global__ void matrixTranspose(T* out, T* in, const int width) { int x = blockDim.x * blockIdx.x + threadIdx.x; T val = in[x]; for (int i = 0; i < width; i++) { - for (int j = 0; j < width; j++) - out[i * width + j] = __shfl(val, j * width + i); + for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); } } // CPU implementation of matrix transpose template -void matrixTransposeCPUReference(T* output, - T* input, const unsigned int width) { +void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) { for (unsigned int j = 0; j < width; j++) { for (unsigned int i = 0; i < width; i++) { output[i * width + j] = input[j * width + i]; @@ -54,61 +50,52 @@ void matrixTransposeCPUReference(T* output, } static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { - *fact = static_cast(INT32_MAX)+1; -} +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } static void getFactor(float* fact) { *fact = 2.5; } static void getFactor(__half* fact) { *fact = 2.5; } static void getFactor(double* fact) { *fact = 2.5; } static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { - *fact = static_cast(__LONG_LONG_MAX__)+1; -} +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } -template -int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { +template int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { int errors = 0; for (int i = 0; i < NUM; i++) { - if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { - errors++; - } - } - return errors; -} - -template <> -int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { errors++; } } return errors; } -template -void init(T* Matrix) { +template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { + int errors = 0; + for (int i = 0; i < NUM; i++) { + if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + errors++; + } + } + return errors; +} + +template void init(T* Matrix) { // initialize the input data T factor; getFactor(&factor); for (int i = 0; i < NUM; i++) { - Matrix[i] = (T)i + factor; + Matrix[i] = (T)i + factor; } } -template <> -void init(__half* Matrix) { +template <> void init(__half* Matrix) { // initialize the input data __half factor; getFactor(&factor); for (int i = 0; i < NUM; i++) { - Matrix[i] = i + __half2float(factor); + Matrix[i] = i + __half2float(factor); } } -template -static void runTest() { +template static void runTest() { T* Matrix; T* TransposeMatrix; T* cpuTransposeMatrix; @@ -129,21 +116,17 @@ static void runTest() { // allocate the memory on the device side HIP_CHECK(hipMalloc(reinterpret_cast(&gpuMatrix), NUM * sizeof(T))); - HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), - NUM * sizeof(T))); + HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), NUM * sizeof(T))); // Memory transfer from host to device - HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), - hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice)); // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(1), - dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0, - gpuTransposeMatrix, gpuMatrix, WIDTH); + hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), + 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host - HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, - NUM * sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -183,26 +166,12 @@ static void runTest() { */ TEST_CASE("Unit_hipShflTests") { - SECTION("run test for int") { - runTest(); - } - SECTION("run test for float") { - runTest(); - } - SECTION("run test for double") { - runTest(); - } + SECTION("run test for int") { runTest(); } + SECTION("run test for float") { runTest(); } + SECTION("run test for double") { runTest(); } // Test added to support half datatype. - SECTION("run test for __half") { - runTest<__half>(); - } - SECTION("run test for int64_t") { - runTest(); - } - SECTION("run test for unsigned int") { - runTest(); - } - SECTION("run test for uint64_t") { - runTest(); - } + SECTION("run test for __half") { runTest<__half>(); } + SECTION("run test for int64_t") { runTest(); } + SECTION("run test for unsigned int") { runTest(); } + SECTION("run test for uint64_t") { runTest(); } } diff --git a/catch/unit/kernel/hipShflUpDownTest.cc b/catch/unit/kernel/hipShflUpDownTest.cc index ab80dd51b1..a06216f03d 100644 --- a/catch/unit/kernel/hipShflUpDownTest.cc +++ b/catch/unit/kernel/hipShflUpDownTest.cc @@ -21,12 +21,10 @@ THE SOFTWARE. #include #include #include -#include const int size = 32; -template -__global__ void shflDownSum(T* a, int size) { +template __global__ void shflDownSum(T* a, int size) { T val = a[threadIdx.x]; for (int i = size / 2; i > 0; i /= 2) { val += __shfl_down(val, i, size); @@ -34,8 +32,7 @@ __global__ void shflDownSum(T* a, int size) { a[threadIdx.x] = val; } -template -__global__ void shflUpSum(T* a, int size) { +template __global__ void shflUpSum(T* a, int size) { T val = a[threadIdx.x]; for (int i = size / 2; i > 0; i /= 2) { val += __shfl_up(val, i, size); @@ -43,34 +40,29 @@ __global__ void shflUpSum(T* a, int size) { a[threadIdx.x] = val; } -template -__global__ void shflXorSum(T* a, int size) { +template __global__ void shflXorSum(T* a, int size) { T val = a[threadIdx.x]; - for (int i = size/2; i > 0; i /= 2) { + for (int i = size / 2; i > 0; i /= 2) { val += __shfl_xor(val, i, size); } a[threadIdx.x] = val; } static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { - *fact = static_cast(INT32_MAX)+1; -} +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } static void getFactor(float* fact) { *fact = 2.5; } static void getFactor(double* fact) { *fact = 2.5; } static void getFactor(__half* fact) { *fact = 2.5; } static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { - *fact = static_cast(__LONG_LONG_MAX__)+1; -} +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } template T sum(T* a) { T cpuSum = 0; T factor; getFactor(&factor); for (int i = 0; i < size; i++) { - a[i] = i + factor; - cpuSum += a[i]; + a[i] = i + factor; + cpuSum += a[i]; } return cpuSum; } @@ -80,8 +72,8 @@ template <> __half sum(__half* a) { __half factor; getFactor(&factor); for (int i = 0; i < size; i++) { - a[i] = i + __half2float(factor); - cpuSum = __half2float(cpuSum) + __half2float(a[i]); + a[i] = i + __half2float(factor); + cpuSum = __half2float(cpuSum) + __half2float(a[i]); } return cpuSum; } @@ -100,8 +92,7 @@ template <> bool compare(__half gpuSum, __half cpuSum) { return false; } -template -static void runTestShflUp() { +template static void runTestShflUp() { const int size = 32; T a[size]; T cpuSum = sum(a); @@ -114,8 +105,7 @@ static void runTestShflUp() { HIP_CHECK(hipFree(d_a)); } -template -static void runTestShflDown() { +template static void runTestShflDown() { T a[size]; T cpuSum = sum(a); T* d_a; @@ -127,8 +117,7 @@ static void runTestShflDown() { HIP_CHECK(hipFree(d_a)); } -template -static void runTestShflXor() { +template static void runTestShflXor() { T a[size]; T cpuSum = sum(a); T* d_a; @@ -141,12 +130,12 @@ static void runTestShflXor() { } /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - -* Contains warp __shfl_up function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_up function + */ /** * Test Description @@ -164,27 +153,13 @@ static void runTestShflXor() { */ TEST_CASE("Unit_runTestShfl_up") { - SECTION("runTestShflUp for int") { - runTestShflUp(); - } - SECTION("runTestShflUp for float") { - runTestShflUp(); - } - SECTION("runTestShflUp for double") { - runTestShflUp(); - } - SECTION("runTestShflUp for __half") { - runTestShflUp<__half>(); - } - SECTION("runTestShflUp for int64_t") { - runTestShflUp(); - } - SECTION("runTestShflUp for unsigned int") { - runTestShflUp(); - } - SECTION("runTestShflUp for uint64_t") { - runTestShflUp(); - } + SECTION("runTestShflUp for int") { runTestShflUp(); } + SECTION("runTestShflUp for float") { runTestShflUp(); } + SECTION("runTestShflUp for double") { runTestShflUp(); } + SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); } + SECTION("runTestShflUp for int64_t") { runTestShflUp(); } + SECTION("runTestShflUp for unsigned int") { runTestShflUp(); } + SECTION("runTestShflUp for uint64_t") { runTestShflUp(); } } /** * End doxygen group __shfl. @@ -192,12 +167,12 @@ TEST_CASE("Unit_runTestShfl_up") { */ /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - -* Contains warp __shfl_down function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_down function + */ /** * Test Description @@ -215,27 +190,13 @@ TEST_CASE("Unit_runTestShfl_up") { */ TEST_CASE("Unit_runTestShfl_Down") { - SECTION("runTestShflDown for int") { - runTestShflDown(); - } - SECTION("runTestShflDown for float") { - runTestShflDown(); - } - SECTION("runTestShflDown for double") { - runTestShflDown(); - } - SECTION("runTestShflDown for __half") { - runTestShflDown<__half>(); - } - SECTION("runTestShflDown for int64_t") { - runTestShflDown(); - } - SECTION("runTestShflDown for unsigned int") { - runTestShflDown(); - } - SECTION("runTestShflDown for uint64_t") { - runTestShflDown(); - } + SECTION("runTestShflDown for int") { runTestShflDown(); } + SECTION("runTestShflDown for float") { runTestShflDown(); } + SECTION("runTestShflDown for double") { runTestShflDown(); } + SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); } + SECTION("runTestShflDown for int64_t") { runTestShflDown(); } + SECTION("runTestShflDown for unsigned int") { runTestShflDown(); } + SECTION("runTestShflDown for uint64_t") { runTestShflDown(); } } /** * End doxygen group __shfl. @@ -243,12 +204,12 @@ TEST_CASE("Unit_runTestShfl_Down") { */ /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_xor(T var, int laneMask, int width=warpSize)` - -* Contains warp __shfl_xor function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_xor(T var, int laneMask, int width=warpSize)` - + * Contains warp __shfl_xor function + */ /** * Test Description @@ -266,27 +227,13 @@ TEST_CASE("Unit_runTestShfl_Down") { */ TEST_CASE("Unit_runTestShfl_Xor") { - SECTION("runTestShflXor for int") { - runTestShflXor(); - } - SECTION("runTestShflXor for float") { - runTestShflXor(); - } - SECTION("runTestShflXor for double") { - runTestShflXor(); - } - SECTION("runTestShflXor for __half") { - runTestShflXor<__half>(); - } - SECTION("runTestShflXor for int64_t") { - runTestShflXor(); - } - SECTION("runTestShflXor for unsigned int") { - runTestShflXor(); - } - SECTION("runTestShflXor for uint64_t") { - runTestShflXor(); - } + SECTION("runTestShflXor for int") { runTestShflXor(); } + SECTION("runTestShflXor for float") { runTestShflXor(); } + SECTION("runTestShflXor for double") { runTestShflXor(); } + SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); } + SECTION("runTestShflXor for int64_t") { runTestShflXor(); } + SECTION("runTestShflXor for unsigned int") { runTestShflXor(); } + SECTION("runTestShflXor for uint64_t") { runTestShflXor(); } } /** * End doxygen group __shfl. diff --git a/catch/unit/kernel/hipTestConstant.cc b/catch/unit/kernel/hipTestConstant.cc index 911457af0f..0d7693a91f 100644 --- a/catch/unit/kernel/hipTestConstant.cc +++ b/catch/unit/kernel/hipTestConstant.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #define LEN 512 #define SIZE 2048 diff --git a/catch/unit/kernel/hipTestGlobalVariable.cc b/catch/unit/kernel/hipTestGlobalVariable.cc index a2d99fa8b7..151a92f7e8 100644 --- a/catch/unit/kernel/hipTestGlobalVariable.cc +++ b/catch/unit/kernel/hipTestGlobalVariable.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #define LEN 512 #define SIZE 2048 diff --git a/catch/unit/kernel/hipTestMemKernel.cc b/catch/unit/kernel/hipTestMemKernel.cc index d97a5698e0..beedb180a5 100644 --- a/catch/unit/kernel/hipTestMemKernel.cc +++ b/catch/unit/kernel/hipTestMemKernel.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #define LEN8 8 * 4 #define LEN9 9 * 4 diff --git a/catch/unit/kernel/launch_bounds.cc b/catch/unit/kernel/launch_bounds.cc index 59b1132898..5cab54679b 100644 --- a/catch/unit/kernel/launch_bounds.cc +++ b/catch/unit/kernel/launch_bounds.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + constexpr size_t N = 1024; int p_blockSize = 256; diff --git a/catch/unit/launchBounds/CMakeLists.txt b/catch/unit/launchBounds/CMakeLists.txt new file mode 100644 index 0000000000..e377ebe5d9 --- /dev/null +++ b/catch/unit/launchBounds/CMakeLists.txt @@ -0,0 +1,48 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + launch_bounds.cc +) + +if(HIP_PLATFORM MATCHES "nvidia") + hip_add_exe_to_target(NAME LaunchBoundsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + hip_add_exe_to_target(NAME LaunchBoundsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +# Below tests fail in PSDB +#add_test(NAME Unit_Kernel_Launch_bounds_Negative_Parameters_CompilerError +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# launch_bounds_compiler_error_kernels.cc -1) +# +#if(HIP_PLATFORM MATCHES "amd") +# add_test(NAME Unit_Kernel_Launch_bounds_Negative_Parameters_ParseError +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# launch_bounds_parse_error_kernels.cc -1) +#endif() diff --git a/catch/unit/launchBounds/launch_bounds.cc b/catch/unit/launchBounds/launch_bounds.cc new file mode 100644 index 0000000000..72b087331b --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds.cc @@ -0,0 +1,173 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "launch_bounds_negative_kernels_rtc.hh" + +/** + * @addtogroup launch_bounds launch_bounds + * @{ + * @ingroup DeviceLanguageTest + * `__launch_bounds__(MAX_THREADS_PER_BLOCK, MIN_WARPS_PER_EXECUTION_UNIT)` - + * allows the application to provide usage hints that influence the resources (primarily registers) + * used by the generated code. It is a function attribute that must be attached to a global + * function. + */ + +constexpr int kMaxThreadsPerBlock = 128; +constexpr int kMinWarpsPerMultiprocessor = 2; + +__launch_bounds__(kMaxThreadsPerBlock, kMinWarpsPerMultiprocessor) __global__ + void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +template void LaunchBoundsWrapper(const int threads_per_block) { + auto block_size = GENERATE(1, 32, 128); + int* A_d; + int* A_h; + int sum{0}; + + A_h = static_cast(malloc(sizeof(int))); + memset(A_h, 0, sizeof(int)); + HIP_CHECK(hipMalloc(&A_d, sizeof(int))); + HIP_CHECK(hipMemcpy(A_d, A_h, sizeof(int), hipMemcpyHostToDevice)); + SumKernel<<>>(A_d); + + if constexpr (out_of_bounds) { + if (threads_per_block < 0) { + HIP_CHECK_ERROR(hipGetLastError(), hipErrorInvalidConfiguration); + } else { +#if HT_AMD + HIP_CHECK_ERROR(hipGetLastError(), hipErrorLaunchFailure); +#else + HIP_CHECK_ERROR(hipGetLastError(), hipErrorInvalidValue); +#endif + } + } else { + HIP_CHECK(hipGetLastError()); + } + + HIP_CHECK(hipMemcpy(A_h, A_d, sizeof(int), hipMemcpyDeviceToHost)); + + if constexpr (!out_of_bounds) { + for (int i = 0; i < threads_per_block * block_size; ++i) { + sum += i; + } + REQUIRE(*A_h == sum); + } + + free(A_h); + HIP_CHECK(hipFree(A_d)); +} + +/** + * Test Description + * ------------------------ + * - Executes simple addition kernel and validates results. + * - The number of threads per block used to launch the kernel + * are complied with the `__launch_bounds__`: + * -# Number of threads per block are less than or equal to the configured maximum value. + * -# Different values are assigned and kernel functionality is validated. + * Test source + * ------------------------ + * - unit/launch_bounds/launch_bounds.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Positive_Basic") { + auto threads_per_block = GENERATE(1, kMaxThreadsPerBlock / 2, kMaxThreadsPerBlock); + LaunchBoundsWrapper(threads_per_block); +} + +/** + * Test Description + * ------------------------ + * - Validates that the kernels will not be launched if the number of threads + * per block is larger than configured with `__launch_bounds__`: + * -# Expected output: + * - return `hipErrorLaunchFailure` on AMD. + * - return `hipErrorInvalidValue` on NVIDIA. + * Test source + * ------------------------ + * - unit/launch_bounds/launch_bounds.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Negative_OutOfBounds") { + auto threads_per_block = + GENERATE(-1 * kMaxThreadsPerBlock, -1, kMaxThreadsPerBlock + 1, 2 * kMaxThreadsPerBlock); + LaunchBoundsWrapper(threads_per_block); +} + +/** + * Test Description + * ------------------------ + * - Validates handling of invalid arguments: + * -# Compiles kernels that are not created appropriately: + * - Maximum number of threads is 0 + * - Maximum number of threads is negative + * - Minimum number of warps is negative + * - Maximum number of threads is not integer value + * - Mimimum number of warps is not integer value + * -# Expected output: compiler error + * - Uses RTC for compilation. + * Test source + * ------------------------ + * - unit/launch_bounds/launch_bounds.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_RTC") { + hiprtcProgram program{}; + +#if HT_AMD + const auto program_source = GENERATE(kMaxThreadsZero, kMaxThreadsNegative, kMinWarpsNegative, + kMaxThreadsNotInt, kMinWarpsNotInt); +#else + // Aligned with CUDA behavior and expected behavior on NVIDIA + const auto program_source = GENERATE(kMaxThreadsNotInt, kMinWarpsNotInt); +#endif + + HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "launch_bounds_negative.cc", 0, + nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log. + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + REQUIRE(error_count > 0); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); +} diff --git a/catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc b/catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc new file mode 100644 index 0000000000..4a8ec9885f --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc @@ -0,0 +1,35 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__launch_bounds__(0) __global__ void MaxThreadsZero(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +__launch_bounds__(1.5) __global__ void MaxThreadsNotInt(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +__launch_bounds__(128, 1.5) __global__ void MinWarpsNotInt(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} diff --git a/catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh b/catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh new file mode 100644 index 0000000000..a341ce67ff --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh @@ -0,0 +1,64 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the launch bounds negative Test Cases that are using RTC. +*/ + +static constexpr auto kMaxThreadsZero{ + R"( + __launch_bounds__(0) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMaxThreadsNegative{ + R"( + __launch_bounds__(-1) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMinWarpsNegative{ + R"( + __launch_bounds__(128, -1) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMaxThreadsNotInt{ + R"( + __launch_bounds__(1.5) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMinWarpsNotInt{ + R"( + __launch_bounds__(128, 1.5) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; diff --git a/catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc b/catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc new file mode 100644 index 0000000000..e0ed6093f5 --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc @@ -0,0 +1,30 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__launch_bounds__(-1) __global__ void MaxThreadsNegative(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +__launch_bounds__(128, -1) __global__ void MinWarpsNegative(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt new file mode 100644 index 0000000000..a5d1f58faf --- /dev/null +++ b/catch/unit/math/CMakeLists.txt @@ -0,0 +1,164 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + trig_funcs.cc + misc_funcs.cc + remainder_and_rounding_funcs.cc + single_precision_intrinsics.cc + double_precision_intrinsics.cc + integer_intrinsics.cc + root_funcs.cc + log_funcs.cc + special_funcs.cc + casting_double_funcs.cc + casting_float_funcs.cc + casting_int_funcs.cc + casting_half2int_funcs.cc + casting_int2half_funcs.cc + casting_half_float_funcs.cc +) + +if(HIP_PLATFORM MATCHES "nvidia") + set(LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC ${TEST_SRC} + pow_funcs.cc + casting_half2_funcs.cc + half_precision_math.cc + half_precision_arithmetic.cc + half_precision_comparison.cc + ) + set(LINKER_LIBS hiprtc) +endif() + +find_package(Boost 1.70.0) +message(STATUS "Boost_FOUND: ${Boost_FOUND}") +if(Boost_FOUND) + hip_add_exe_to_target(NAME MathsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC} + LINKER_LIBS ${LINKER_LIBS}) + target_include_directories(MathsTest PRIVATE ${Boost_INCLUDE_DIRS}) +else() + message(STATUS "Boost not found. Dependent math tests not enabled.") +endif() + +# Below tests fail in PSDB +#add_test(NAME Unit_Device_Single_Precision_Trig_Functions_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# trig_single_precision_negative_kernels.cc 66) +# +#add_test(NAME Unit_Device_Double_Precision_Trig_Functions_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# trig_double_precision_negative_kernels.cc 66) +#add_test(NAME Unit_Device_Misc_Functions_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# misc_negative_kernels.cc 76) +# +#add_test(NAME Unit_Device_remainder_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_remainder_negative_kernels.cc 68) +# +#add_test(NAME Unit_Device_rounding_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_rounding_negative_kernels.cc 40) +# +#add_test(NAME Unit_Single_Precision_Intrinsics_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# single_precision_intrinsics_negative_kernels.cc 42) +# +#add_test(NAME Unit_Double_Precision_Intrinsics_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# double_precision_intrinsics_negative_kernels.cc 18) +# +#add_test(NAME Unit_Integer_Intrinsics_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# integer_intrinsics_negative_kernels.cc 20) +#add_test(NAME Unit_Device_root_1Dand2D_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_root_negative_kernels_1Dand2D.cc 68) +# +#add_test(NAME Unit_Device_root_3Dand4D_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_root_negative_kernels_3Dand4D.cc 56) +#add_test(NAME Unit_Device_pow_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_pow_negative_kernels.cc 76) +#add_test(NAME Unit_Device_log_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_log_negative_kernels.cc 24) +#add_test(NAME Unit_Device_special_funcs_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# math_special_func_kernels.cc 76) +#add_test(NAME Unit_Device_casting_double_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_double_negative_kernels.cc 69) +#add_test(NAME Unit_Device_casting_float_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_float_negative_kernels.cc 54) +#add_test(NAME Unit_Device_casting_int_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_int_negative_kernels.cc 92) +# +#add_test(NAME Unit_Device_casting_half2_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_half2_negative_kernels.cc 53) +#add_test(NAME Unit_Half_Precision_Math_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# half_precision_math_negative_kernels.cc 60) +#add_test(NAME Unit_Half_Precision_Arithmetic_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# half_precision_arithmetic_negative_kernels.cc 88) +#add_test(NAME Unit_Half_Precision_Comparison_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# half_precision_comparison_negative_kernels.cc 168) +#add_test(NAME Unit_Device_casting_half2int_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_half2int_negative_kernels.cc 78) +#add_test(NAME Unit_Device_casting_int2half_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_int2half_negative_kernels.cc 78) +#add_test(NAME Unit_Device_casting_half_float_Negative +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# casting_half_float_negative_kernels.cc 18) diff --git a/catch/unit/math/Float16.hh b/catch/unit/math/Float16.hh new file mode 100644 index 0000000000..0983be733a --- /dev/null +++ b/catch/unit/math/Float16.hh @@ -0,0 +1,55 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#define FLOAT16_MAX 65504.0f + +class Float16 { + public: + __host__ __device__ Float16() = default; + __host__ __device__ Float16(__half x) : x_{x} {} + __host__ __device__ Float16(__half2 x) : x_{__low2half(x)} {} + __host__ __device__ Float16(float x) : x_{__float2half(x)} {} + + // __heq doesn't have a __host__ version + __host__ __device__ bool operator==(Float16 other) const { return (static_cast<__half_raw>(x_).x == static_cast<__half_raw>(other.x_).x); } + __host__ __device__ bool operator!=(Float16 other) const { return !(*this == other); } + + __host__ __device__ operator __half() const { return x_; } + __host__ __device__ operator __half2() const { return __half2half2(x_); } + __host__ __device__ operator float() const { return __half2float(x_); } + + private: + __half x_; +}; + +namespace { + +inline std::ostream& operator<<(std::ostream& o, Float16 x) { + o << static_cast(x); + return o; +} + +} // namespace \ No newline at end of file diff --git a/catch/unit/math/binary_common.hh b/catch/unit/math/binary_common.hh new file mode 100644 index 0000000000..395fb28c79 --- /dev/null +++ b/catch/unit/math/binary_common.hh @@ -0,0 +1,141 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_BINARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(RT* const ys, const size_t num_xs, T* const x1s, \ + T* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + } + +template +void BinaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 2 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + if constexpr (std::is_same_v) { + std::uniform_real_distribution> unif_dist(-FLOAT16_MAX, FLOAT16_MAX); + return static_cast(unif_dist(rng)); + } else { + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + } + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void BinaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + using SpecialValsType = std::conditional_t, float, TArg>; + const auto values = std::get>(kSpecialValRegistry); + + const auto size = values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + x1s.ptr()[i * values.size + j] = values.data[i]; + x2s.ptr()[i * values.size + j] = values.data[j]; + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr()); +} + +template +void BinaryFloatingPointTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + BinaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { BinaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} + +#define MATH_BINARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_BINARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + BinaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } diff --git a/catch/unit/math/casting_common.hh b/catch/unit/math/casting_common.hh new file mode 100644 index 0000000000..9fb56d3674 --- /dev/null +++ b/catch/unit/math/casting_common.hh @@ -0,0 +1,259 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "unary_common.hh" +#include + +namespace cg = cooperative_groups; + +#define CAST_KERNEL_DEF(func_name, T1, T2) \ + __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T2* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define CAST_BINARY_KERNEL_DEF(func_name, T1, T2) \ + __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T2* const x1s, \ + T2* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define CAST_F2I_REF_DEF(func_name, T1, T2, ref_func) \ + T1 func_name##_ref(T2 arg) { \ + if (arg >= static_cast(std::numeric_limits::max())) \ + return std::numeric_limits::max(); \ + else if (arg <= static_cast(std::numeric_limits::min())) \ + return std::numeric_limits::min(); \ + T2 result = ref_func(arg); \ + return result; \ + } + +#define CAST_F2I_RZ_REF_DEF(func_name, T1, T2) \ + T1 func_name##_ref(T2 arg) { \ + if (arg >= static_cast(std::numeric_limits::max())) \ + return std::numeric_limits::max(); \ + else if (arg <= static_cast(std::numeric_limits::min())) \ + return std::numeric_limits::min(); \ + T1 result = static_cast(arg); \ + return result; \ + } + +#define CAST_RND_REF_DEF(func_name, T1, T2, round_dir) \ + T1 func_name##_ref(T2 arg) { \ + int curr_direction = fegetround(); \ + fesetround(round_dir); \ + T1 result = static_cast(arg); \ + fesetround(curr_direction); \ + return result; \ + } + +#define CAST_REF_DEF(func_name, T1, T2) \ + T1 func_name##_ref(T2 arg) { \ + T1 result = static_cast(arg); \ + return result; \ + } + +template T1 type2_as_type1_ref(T2 arg) { + T1 tmp; + memcpy(&tmp, &arg, sizeof(tmp)); + return tmp; +} + +template +void CastUnaryHalfPrecisionBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint16_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + if (std::isnan(values.ptr()[base_idx + j]) || std::isinf(values.ptr()[base_idx + j])) { + values.ptr()[base_idx + j] = 0; + } + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void CastUnaryHalfPrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { CastUnaryHalfPrecisionBruteForceTest(kernel, ref, validator_builder); } +} + + +template +void CastDoublePrecisionSpecialValuesTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + std::vector spec_values; + + if (!std::is_same_v && !std::is_same_v && !std::is_same_v) { + for (int i = 0; i < values.size; i++) { + if (!std::isnan(values.data[i]) && !std::isinf(values.data[i])) { + spec_values.push_back(values.data[i]); + } + } + } + + MathTest math_test(kernel, spec_values.size()); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, + spec_values.size(), spec_values.data()); +} + +template +void CastDoublePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + CastDoublePrecisionSpecialValuesTest(kernel, ref, validator_builder); + } + + SECTION("Brute force") { UnaryDoublePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +template +void CastIntRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg)); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + size_t inserted = 0u; + for (TArg v = a; v <= b; v++) { + values.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); + inserted = 0u; + } +} + +template +void CastIntBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg)), num_iterations); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &values] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_int_distribution unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(values.ptr() + base_idx, values.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void CastBinaryIntRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const T2 a = std::numeric_limits::lowest(), + const T2 b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) + 2 * sizeof(T2)); + LinearAllocGuard values1{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + LinearAllocGuard values2{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + + MathTest math_test(kernel, max_batch_size); + + size_t inserted = 0u; + for (T2 v = a; v <= b; v++) { + values1.ptr()[inserted] = v; + values2.ptr()[inserted++] = b - v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values1.ptr(), + values2.ptr()); + inserted = 0u; + } +} diff --git a/catch/unit/math/casting_double_funcs.cc b/catch/unit/math/casting_double_funcs.cc new file mode 100644 index 0000000000..fcdbe441ef --- /dev/null +++ b/catch/unit/math/casting_double_funcs.cc @@ -0,0 +1,597 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "casting_common.hh" +#include "casting_double_negative_kernels_rtc.hh" + +/** + * @addtogroup CastingDoubleType CastingDoubleType + * @{ + * @ingroup MathTest + */ + +#define CAST_DOUBLE2INT_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_REF_DEF(kern_name, T, double, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_DOUBLE2INT_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, double) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_rd` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2int_rd, int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_rn` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2int_rn, int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_ru` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2int_ru, int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_RZ_TEST_DEF(__double2int_rz, int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2int_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2int_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Int); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_rd` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2uint_rd, unsigned int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_rn` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2uint_rn, unsigned int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_ru` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2uint_ru, unsigned int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_rz` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_RZ_TEST_DEF(__double2uint_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2uint_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2uint_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Uint); } + +#define CAST_DOUBLE2LL_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_REF_DEF(kern_name, T, double, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +#define CAST_DOUBLE2LL_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, double) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_rd` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ll_rd, long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_rn` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ll_rn, long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_ru` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ll_ru, long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to long long int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_RZ_TEST_DEF(__double2ll_rz, long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2ll_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2ll_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2LL); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_rd` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ull_rd, unsigned long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_rn` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ull_rn, unsigned long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_ru` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ull_ru, unsigned long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to unsigned long long int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_RZ_TEST_DEF(__double2ull_rz, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2ull_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2ull_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2ULL); } + +#define CAST_DOUBLE2FLOAT_TEST_DEF(kern_name, round_dir) \ + CAST_KERNEL_DEF(kern_name, float, double) \ + CAST_RND_REF_DEF(kern_name, float, double, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + float (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_DOUBLE2FLOAT_RN_TEST_DEF(kern_name) \ + CAST_KERNEL_DEF(kern_name, float, double) \ + CAST_REF_DEF(kern_name, float, double) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + float (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_rd` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float with rounding mode FE_DOWNWARD. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_rd, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_rn` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_RN_TEST_DEF(__double2float_rn) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_ru` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float with rounding mode FE_UPWARD. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_ru, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_rz` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float with rounding mode FE_TOWARDZERO. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_rz, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Float); } + +CAST_KERNEL_DEF(__double2hiint, int, double) + +int __double2hiint_ref(double arg) { + int tmp[2]; + memcpy(tmp, &arg, sizeof(tmp)); + return tmp[1]; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2hiint` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs copy of higher part of double value to int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2hiint_Positive") { + int (*ref)(double) = __double2hiint_ref; + CastDoublePrecisionTest(__double2hiint_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2hiint. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2hiint_Negative_RTC") { NegativeTestRTCWrapper<3>(kDouble2Hiint); } + +CAST_KERNEL_DEF(__double2loint, int, double) + +int __double2loint_ref(double arg) { + int tmp[2]; + memcpy(tmp, &arg, sizeof(tmp)); + return tmp[0]; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2loint` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs copy of lower part of double value to int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2loint_Positive") { + int (*ref)(double) = __double2loint_ref; + CastDoublePrecisionTest(__double2loint_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2loint. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2loint_Negative_RTC") { NegativeTestRTCWrapper<3>(kDouble2Loint); } + +CAST_KERNEL_DEF(__double_as_longlong, long long int, double) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double_as_longlong` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs copy of double value to long long int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double_as_longlong_Positive") { + long long int (*ref)(double) = type2_as_type1_ref; + CastDoublePrecisionTest(__double_as_longlong_kernel, ref, + EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double_as_longlong. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double_as_longlong_Negative_RTC") { + NegativeTestRTCWrapper<3>(kDoubleAsLonglong); +} \ No newline at end of file diff --git a/catch/unit/math/casting_double_negative_kernels.cc b/catch/unit/math/casting_double_negative_kernels.cc new file mode 100644 index 0000000000..8386107aed --- /dev/null +++ b/catch/unit/math/casting_double_negative_kernels.cc @@ -0,0 +1,55 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(T* result, double* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, double x) { *result = func_name(x); } + +NEGATIVE_KERNELS_SHELL(__double2int_rd, int) +NEGATIVE_KERNELS_SHELL(__double2int_rn, int) +NEGATIVE_KERNELS_SHELL(__double2int_ru, int) +NEGATIVE_KERNELS_SHELL(__double2int_rz, int) +NEGATIVE_KERNELS_SHELL(__double2uint_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2uint_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2uint_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2uint_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2ll_rd, long long int) +NEGATIVE_KERNELS_SHELL(__double2ll_rn, long long int) +NEGATIVE_KERNELS_SHELL(__double2ll_ru, long long int) +NEGATIVE_KERNELS_SHELL(__double2ll_rz, long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_rd, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_rn, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_ru, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_rz, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2float_rd, float) +NEGATIVE_KERNELS_SHELL(__double2float_rn, float) +NEGATIVE_KERNELS_SHELL(__double2float_ru, float) +NEGATIVE_KERNELS_SHELL(__double2float_rz, float) +NEGATIVE_KERNELS_SHELL(__double2hiint, int) +NEGATIVE_KERNELS_SHELL(__double2loint, int) +NEGATIVE_KERNELS_SHELL(__double_as_longlong, long long int) \ No newline at end of file diff --git a/catch/unit/math/casting_double_negative_kernels_rtc.hh b/catch/unit/math/casting_double_negative_kernels_rtc.hh new file mode 100644 index 0000000000..440f4cad9d --- /dev/null +++ b/catch/unit/math/casting_double_negative_kernels_rtc.hh @@ -0,0 +1,157 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the double type casting negative Test Cases that are using RTC. +*/ + +static constexpr auto kDouble2Int{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2int_rd_kernel_v1(int* result, double* x) { *result = __double2int_rd(x); } + __global__ void double2int_rd_kernel_v2(int* result, Dummy x) { *result = __double2int_rd(x); } + __global__ void double2int_rd_kernel_v3(Dummy* result, double x) { *result = __double2int_rd(x); } + __global__ void double2int_rn_kernel_v1(int* result, double* x) { *result = __double2int_rn(x); } + __global__ void double2int_rn_kernel_v2(int* result, Dummy x) { *result = __double2int_rn(x); } + __global__ void double2int_rn_kernel_v3(Dummy* result, double x) { *result = __double2int_rn(x); } + __global__ void double2int_ru_kernel_v1(int* result, double* x) { *result = __double2int_ru(x); } + __global__ void double2int_ru_kernel_v2(int* result, Dummy x) { *result = __double2int_ru(x); } + __global__ void double2int_ru_kernel_v3(Dummy* result, double x) { *result = __double2int_ru(x); } + __global__ void double2int_rz_kernel_v1(int* result, double* x) { *result = __double2int_rz(x); } + __global__ void double2int_rz_kernel_v2(int* result, Dummy x) { *result = __double2int_rz(x); } + __global__ void double2int_rz_kernel_v3(Dummy* result, double x) { *result = __double2int_rz(x); } +)"}; + +static constexpr auto kDouble2Uint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2uint_rd_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rd(x); } + __global__ void double2uint_rd_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rd(x); } + __global__ void double2uint_rd_kernel_v3(Dummy* result, double x) { *result = __double2uint_rd(x); } + __global__ void double2uint_rn_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rn(x); } + __global__ void double2uint_rn_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rn(x); } + __global__ void double2uint_rn_kernel_v3(Dummy* result, double x) { *result = __double2uint_rn(x); } + __global__ void double2uint_ru_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_ru(x); } + __global__ void double2uint_ru_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_ru(x); } + __global__ void double2uint_ru_kernel_v3(Dummy* result, double x) { *result = __double2uint_ru(x); } + __global__ void double2uint_rz_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rz(x); } + __global__ void double2uint_rz_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rz(x); } + __global__ void double2uint_rz_kernel_v3(Dummy* result, double x) { *result = __double2uint_rz(x); } +)"}; + +static constexpr auto kDouble2LL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2ll_rd_kernel_v1(long long int* result, double* x) { *result = __double2ll_rd(x); } + __global__ void double2ll_rd_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rd(x); } + __global__ void double2ll_rd_kernel_v3(Dummy* result, double x) { *result = __double2ll_rd(x); } + __global__ void double2ll_rn_kernel_v1(long long int* result, double* x) { *result = __double2ll_rn(x); } + __global__ void double2ll_rn_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rn(x); } + __global__ void double2ll_rn_kernel_v3(Dummy* result, double x) { *result = __double2ll_rn(x); } + __global__ void double2ll_ru_kernel_v1(long long int* result, double* x) { *result = __double2ll_ru(x); } + __global__ void double2ll_ru_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_ru(x); } + __global__ void double2ll_ru_kernel_v3(Dummy* result, double x) { *result = __double2ll_ru(x); } + __global__ void double2ll_rz_kernel_v1(long long int* result, double* x) { *result = __double2ll_rz(x); } + __global__ void double2ll_rz_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rz(x); } + __global__ void double2ll_rz_kernel_v3(Dummy* result, double x) { *result = __double2ll_rz(x); } +)"}; + +static constexpr auto kDouble2ULL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2ull_rd_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rd(x); } + __global__ void double2ull_rd_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rd(x); } + __global__ void double2ull_rd_kernel_v3(Dummy* result, double x) { *result = __double2ull_rd(x); } + __global__ void double2ull_rn_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rn(x); } + __global__ void double2ull_rn_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rn(x); } + __global__ void double2ull_rn_kernel_v3(Dummy* result, double x) { *result = __double2ull_rn(x); } + __global__ void double2ull_ru_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_ru(x); } + __global__ void double2ull_ru_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_ru(x); } + __global__ void double2ull_ru_kernel_v3(Dummy* result, double x) { *result = __double2ull_ru(x); } + __global__ void double2ull_rz_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rz(x); } + __global__ void double2ull_rz_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rz(x); } + __global__ void double2ull_rz_kernel_v3(Dummy* result, double x) { *result = __double2ull_rz(x); } +)"}; + +static constexpr auto kDouble2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2float_rd_kernel_v1(float* result, double* x) { *result = __double2float_rd(x); } + __global__ void double2float_rd_kernel_v2(float* result, Dummy x) { *result = __double2float_rd(x); } + __global__ void double2float_rd_kernel_v3(Dummy* result, double x) { *result = __double2float_rd(x); } + __global__ void double2float_rn_kernel_v1(float* result, double* x) { *result = __double2float_rn(x); } + __global__ void double2float_rn_kernel_v2(float* result, Dummy x) { *result = __double2float_rn(x); } + __global__ void double2float_rn_kernel_v3(Dummy* result, double x) { *result = __double2float_rn(x); } + __global__ void double2float_ru_kernel_v1(float* result, double* x) { *result = __double2float_ru(x); } + __global__ void double2float_ru_kernel_v2(float* result, Dummy x) { *result = __double2float_ru(x); } + __global__ void double2float_ru_kernel_v3(Dummy* result, double x) { *result = __double2float_ru(x); } + __global__ void double2float_rz_kernel_v1(float* result, double* x) { *result = __double2float_rz(x); } + __global__ void double2float_rz_kernel_v2(float* result, Dummy x) { *result = __double2float_rz(x); } + __global__ void double2float_rz_kernel_v3(Dummy* result, double x) { *result = __double2float_rz(x); } +)"}; + +static constexpr auto kDouble2Hiint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2hiint_kernel_v1(int* result, double* x) { *result = __double2hiint(x); } + __global__ void double2hiint_kernel_v2(int* result, Dummy x) { *result = __double2hiint(x); } + __global__ void double2hiint_kernel_v3(Dummy* result, double x) { *result = __double2hiint(x); } +)"}; + +static constexpr auto kDouble2Loint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2loint_kernel_v1(int* result, double* x) { *result = __double2loint(x); } + __global__ void double2loint_kernel_v2(int* result, Dummy x) { *result = __double2loint(x); } + __global__ void double2loint_kernel_v3(Dummy* result, double x) { *result = __double2loint(x); } +)"}; + +static constexpr auto kDoubleAsLonglong{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double_as_longlong_kernel_v1(long long int* result, double* x) { *result = __double_as_longlong(x); } + __global__ void double_as_longlong_kernel_v2(long long int* result, Dummy x) { *result = __double_as_longlong(x); } + __global__ void double_as_longlong_kernel_v3(Dummy* result, double x) { *result = __double_as_longlong(x); } +)"}; diff --git a/catch/unit/math/casting_float_funcs.cc b/catch/unit/math/casting_float_funcs.cc new file mode 100644 index 0000000000..f5e92e218b --- /dev/null +++ b/catch/unit/math/casting_float_funcs.cc @@ -0,0 +1,440 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "casting_common.hh" +#include "casting_float_negative_kernels_rtc.hh" + +/** + * @addtogroup CastingFloatType CastingFloatType + * @{ + * @ingroup MathTest + */ + +#define CAST_FLOAT2INT_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_REF_DEF(kern_name, T, float, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::lowest(), \ + std::numeric_limits::max()); \ + } + +#define CAST_FLOAT2INT_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, float) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::lowest(), \ + std::numeric_limits::max()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_rd` for all possible inputs. The results are compared against + * reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_rd, int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_rn` for all possible inputs. The results are compared against + * reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_rn, int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_ru` for all possible inputs. The results are compared against + * reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_ru, int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_rz` for all possible inputs. The results are compared against + * reference function `std::trunc`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_rz, int, std::trunc) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2int_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2int_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2Int); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_rd` for all possible inputs. The results are compared + * against reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2uint_rd, unsigned int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_rn` for all possible inputs. The results are compared + * against reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2uint_rn, unsigned int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_ru` for all possible inputs. The results are compared + * against reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2uint_ru, unsigned int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_RZ_TEST_DEF(__float2uint_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2uint_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2uint_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2Uint); } + +#define CAST_FLOAT2LL_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_REF_DEF(kern_name, T, float, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +#define CAST_FLOAT2LL_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, float) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rd` for all possible inputs. The results are compared against + * reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ll_rd, long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rn` for all possible inputs between lowest and maximal long + * long int value. The results are compared against reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ll_rn, long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_ru` for all possible inputs between lowest and maximal long + * long int value. The results are compared against reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ll_ru, long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rz` for all possible inputs between lowest and maximal long + * long int value. The results are compared against reference function which performs cast to long + * long int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_RZ_TEST_DEF(__float2ll_rz, long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2ll_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2ll_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2LL); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ull_rd` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ull_rd, unsigned long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ull_rn` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ull_rn, unsigned long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ull_ru` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ull_ru, unsigned long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rz` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function which performs + * cast to unsigned long long int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_RZ_TEST_DEF(__float2ull_rz, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2ull_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2ull_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2ULL); } + +CAST_KERNEL_DEF(__float_as_int, int, float) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float_as_int` for all possible inputs. The results are compared against + * reference function which performs copy of float value to int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_int_Positive") { + int (*ref)(float) = type2_as_type1_ref; + UnarySinglePrecisionTest(__float_as_int_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float_as_int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_int_Negative_RTC") { NegativeTestRTCWrapper<3>(kFloatAsInt); } + +CAST_KERNEL_DEF(__float_as_uint, unsigned int, float) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float_as_uint` for all possible inputs. The results are compared + * against reference function which performs copy of float value to unsigned int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_uint_Positive") { + unsigned int (*ref)(float) = type2_as_type1_ref; + UnarySinglePrecisionTest(__float_as_uint_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float_as_uint. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_uint_Negative_RTC") { NegativeTestRTCWrapper<3>(kFloatAsUint); } diff --git a/catch/unit/math/casting_float_negative_kernels.cc b/catch/unit/math/casting_float_negative_kernels.cc new file mode 100644 index 0000000000..eecbd6dd7e --- /dev/null +++ b/catch/unit/math/casting_float_negative_kernels.cc @@ -0,0 +1,50 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(T* result, float* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, float x) { *result = func_name(x); } + +NEGATIVE_KERNELS_SHELL(__float2int_rd, int) +NEGATIVE_KERNELS_SHELL(__float2int_rn, int) +NEGATIVE_KERNELS_SHELL(__float2int_ru, int) +NEGATIVE_KERNELS_SHELL(__float2int_rz, int) +NEGATIVE_KERNELS_SHELL(__float2uint_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2uint_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2uint_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2uint_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2ll_rd, long long int) +NEGATIVE_KERNELS_SHELL(__float2ll_rn, long long int) +NEGATIVE_KERNELS_SHELL(__float2ll_ru, long long int) +NEGATIVE_KERNELS_SHELL(__float2ll_rz, long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_rd, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_rn, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_ru, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_rz, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float_as_int, int) +NEGATIVE_KERNELS_SHELL(__float_as_uint, unsigned int) \ No newline at end of file diff --git a/catch/unit/math/casting_float_negative_kernels_rtc.hh b/catch/unit/math/casting_float_negative_kernels_rtc.hh new file mode 100644 index 0000000000..45fba3b0e7 --- /dev/null +++ b/catch/unit/math/casting_float_negative_kernels_rtc.hh @@ -0,0 +1,126 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the float type casting negative Test Cases that are using RTC. +*/ + +static constexpr auto kFloat2Int{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2int_rd_kernel_v1(int* result, float* x) { *result = __float2int_rd(x); } + __global__ void float2int_rd_kernel_v2(int* result, Dummy x) { *result = __float2int_rd(x); } + __global__ void float2int_rd_kernel_v3(Dummy* result, float x) { *result = __float2int_rd(x); } + __global__ void float2int_rn_kernel_v1(int* result, float* x) { *result = __float2int_rn(x); } + __global__ void float2int_rn_kernel_v2(int* result, Dummy x) { *result = __float2int_rn(x); } + __global__ void float2int_rn_kernel_v3(Dummy* result, float x) { *result = __float2int_rn(x); } + __global__ void float2int_ru_kernel_v1(int* result, float* x) { *result = __float2int_ru(x); } + __global__ void float2int_ru_kernel_v2(int* result, Dummy x) { *result = __float2int_ru(x); } + __global__ void float2int_ru_kernel_v3(Dummy* result, float x) { *result = __float2int_ru(x); } + __global__ void float2int_rz_kernel_v1(int* result, float* x) { *result = __float2int_rz(x); } + __global__ void float2int_rz_kernel_v2(int* result, Dummy x) { *result = __float2int_rz(x); } + __global__ void float2int_rz_kernel_v3(Dummy* result, float x) { *result = __float2int_rz(x); } +)"}; + +static constexpr auto kFloat2Uint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2uint_rd_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_rd(x); } + __global__ void float2uint_rd_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_rd(x); } + __global__ void float2uint_rd_kernel_v3(Dummy* result, float x) { *result = __float2uint_rd(x); } + __global__ void float2uint_rn_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_rn(x); } + __global__ void float2uint_rn_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_rn(x); } + __global__ void float2uint_rn_kernel_v3(Dummy* result, float x) { *result = __float2uint_rn(x); } + __global__ void float2uint_ru_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_ru(x); } + __global__ void float2uint_ru_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_ru(x); } + __global__ void float2uint_ru_kernel_v3(Dummy* result, float x) { *result = __float2uint_ru(x); } + __global__ void float2uint_rz_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_rz(x); } + __global__ void float2uint_rz_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_rz(x); } + __global__ void float2uint_rz_kernel_v3(Dummy* result, float x) { *result = __float2uint_rz(x); } +)"}; + +static constexpr auto kFloat2LL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2ll_rd_kernel_v1(long long int* result, float* x) { *result = __float2ll_rd(x); } + __global__ void float2ll_rd_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_rd(x); } + __global__ void float2ll_rd_kernel_v3(Dummy* result, float x) { *result = __float2ll_rd(x); } + __global__ void float2ll_rn_kernel_v1(long long int* result, float* x) { *result = __float2ll_rn(x); } + __global__ void float2ll_rn_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_rn(x); } + __global__ void float2ll_rn_kernel_v3(Dummy* result, float x) { *result = __float2ll_rn(x); } + __global__ void float2ll_ru_kernel_v1(long long int* result, float* x) { *result = __float2ll_ru(x); } + __global__ void float2ll_ru_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_ru(x); } + __global__ void float2ll_ru_kernel_v3(Dummy* result, float x) { *result = __float2ll_ru(x); } + __global__ void float2ll_rz_kernel_v1(long long int* result, float* x) { *result = __float2ll_rz(x); } + __global__ void float2ll_rz_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_rz(x); } + __global__ void float2ll_rz_kernel_v3(Dummy* result, float x) { *result = __float2ll_rz(x); } +)"}; + +static constexpr auto kFloat2ULL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2ull_rd_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_rd(x); } + __global__ void float2ull_rd_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_rd(x); } + __global__ void float2ull_rd_kernel_v3(Dummy* result, float x) { *result = __float2ull_rd(x); } + __global__ void float2ull_rn_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_rn(x); } + __global__ void float2ull_rn_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_rn(x); } + __global__ void float2ull_rn_kernel_v3(Dummy* result, float x) { *result = __float2ull_rn(x); } + __global__ void float2ull_ru_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_ru(x); } + __global__ void float2ull_ru_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_ru(x); } + __global__ void float2ull_ru_kernel_v3(Dummy* result, float x) { *result = __float2ull_ru(x); } + __global__ void float2ull_rz_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_rz(x); } + __global__ void float2ull_rz_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_rz(x); } + __global__ void float2ull_rz_kernel_v3(Dummy* result, float x) { *result = __float2ull_rz(x); } +)"}; + +static constexpr auto kFloatAsInt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float_as_int_kernel_v1(int* result, float* x) { *result = __float_as_int(x); } + __global__ void float_as_int_kernel_v2(int* result, Dummy x) { *result = __float_as_int(x); } + __global__ void float_as_int_kernel_v3(Dummy* result, float x) { *result = __float_as_int(x); } +)"}; + +static constexpr auto kFloatAsUint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float_as_uint_kernel_v1(unsigned int* result, float* x) { *result = __float_as_uint(x); } + __global__ void float_as_uint_kernel_v2(unsigned int* result, Dummy x) { *result = __float_as_uint(x); } + __global__ void float_as_uint_kernel_v3(Dummy* result, float x) { *result = __float_as_uint(x); } +)"}; diff --git a/catch/unit/math/casting_half2_common.hh b/catch/unit/math/casting_half2_common.hh new file mode 100644 index 0000000000..085ae46ccc --- /dev/null +++ b/catch/unit/math/casting_half2_common.hh @@ -0,0 +1,97 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "validators.hh" + +namespace cg = cooperative_groups; + +#define CAST_HALF2_KERNEL_DEF(func_name, T) \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, Float16* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(__half2{xs[i], -xs[i]}); \ + } \ + } + +#define CAST_BINARY_HALF2_KERNEL_DEF(func_name, T) \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(__half2{x1s[i], -x1s[i]}, __half2{x2s[i], -x2s[i]}); \ + } \ + } + +template class Float2Validator : public MatcherBase { + public: + Float2Validator(const float2& target, const VB& vb) + : first_matcher_{vb(target.x)}, second_matcher_{vb(target.y)} {} + + bool match(const float2& val) const override { + return first_matcher_->match(val.x) && second_matcher_->match(val.y); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(float())) first_matcher_; + decltype(std::declval()(float())) second_matcher_; +}; + +template +auto Float2ValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const float2& t, auto&&...) { + return std::make_unique>(t, vb); + }; +} + +template class Half2Validator : public MatcherBase<__half2> { + public: + Half2Validator(const __half2& target, const VB& vb) + : first_matcher_{vb(target.data.x)}, second_matcher_{vb(target.data.y)} {} + + bool match(const __half2& val) const override { + return first_matcher_->match(val.data.x) && second_matcher_->match(val.data.y); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(Float16())) first_matcher_; + decltype(std::declval()(Float16())) second_matcher_; +}; + +template auto Half2ValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const __half2& t, auto&&...) { + return std::make_unique>(t, vb); + }; +} diff --git a/catch/unit/math/casting_half2_funcs.cc b/catch/unit/math/casting_half2_funcs.cc new file mode 100644 index 0000000000..38562f38eb --- /dev/null +++ b/catch/unit/math/casting_half2_funcs.cc @@ -0,0 +1,419 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" +#include "casting_half2_common.hh" + +/** + * @addtogroup HalfPrecisionCastingHalf2 HalfPrecisionCastingHalf2 + * @{ + * @ingroup MathTest + */ + +/********** half -> half2 **********/ + +CAST_KERNEL_DEF(__half2half2, __half2, Float16) + +static __half2 __half2half2_ref(Float16 x) { return __half2{x, x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2half2` for all possible inputs. The results are compared against + * reference function which returns __half2 value created from one __half value. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half2half2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__half2half2_kernel, __half2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_KERNEL_DEF(make_half2, __half2, Float16) + +static __half2 make_half2_ref(Float16 x, Float16 y) { return __half2{x, y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `make_half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from two __half values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_make_half2_Accuracy_Positive") { + BinaryFloatingPointTest(make_half2_kernel, make_half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_KERNEL_DEF(__halves2half2, __half2, Float16) + +static __half2 __halves2half2_ref(Float16 x, Float16 y) { return __half2{x, y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__halves2half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from two __half values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___halves2half2_Accuracy_Positive") { + BinaryFloatingPointTest(__halves2half2_kernel, __halves2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** half2 -> half **********/ + + +CAST_HALF2_KERNEL_DEF(__low2half, Float16) + +static Float16 __low2half_ref(Float16 x) { return x; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__low2half` for all possible inputs. The results are compared against + * reference function which returns __half value created from lower __half2 element. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___low2half_Accuracy_Positive") { + UnaryHalfPrecisionTest(__low2half_kernel, __low2half_ref, EqValidatorBuilderFactory()); +} + +CAST_HALF2_KERNEL_DEF(__high2half, Float16) + +static Float16 __high2half_ref(Float16 x) { return -x; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__high2half` for all possible inputs. The results are compared against + * reference function which returns __half value created from higher __half2 element. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___high2half_Accuracy_Positive") { + UnaryHalfPrecisionTest(__high2half_kernel, __high2half_ref, EqValidatorBuilderFactory()); +} + +/********** half2 -> half2 **********/ + +CAST_HALF2_KERNEL_DEF(__low2half2, __half2) + +static __half2 __low2half2_ref(Float16 x) { return __half2{x, x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__low2half2` for all possible inputs. The results are compared against + * reference function which returns __half2 value created from two lower __half2 elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___low2half2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__low2half2_kernel, __low2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_HALF2_KERNEL_DEF(__high2half2, __half2) + +static __half2 __high2half2_ref(Float16 x) { return __half2{-x, -x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__high2half2` for all possible inputs. The results are compared against + * reference function which returns __half2 value created from two higher __half2 elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___high2half2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__high2half2_kernel, __high2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_HALF2_KERNEL_DEF(__lowhigh2highlow, __half2) + +static __half2 __lowhigh2highlow_ref(Float16 x) { return __half2{-x, x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__lowhigh2highlow` for all possible inputs. The results are compared + * against reference function which returns __half2 value created from higher and lower __half2 + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___lowhigh2highlow_Accuracy_Positive") { + UnaryHalfPrecisionTest(__lowhigh2highlow_kernel, __lowhigh2highlow_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_HALF2_KERNEL_DEF(__lows2half2, __half2) + +static __half2 __lows2half2_ref(Float16 x, Float16 y) { return __half2{x, y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__lows2half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from lower elements of two __half2 values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___lows2half2_Accuracy_Positive") { + BinaryFloatingPointTest(__lows2half2_kernel, __lows2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_HALF2_KERNEL_DEF(__highs2half2, __half2) + +static __half2 __highs2half2_ref(Float16 x, Float16 y) { return __half2{-x, -y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__highs2half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from higher elements of two __half2 values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___highs2half2_Accuracy_Positive") { + BinaryFloatingPointTest(__highs2half2_kernel, __highs2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** float -> half2 **********/ + +CAST_KERNEL_DEF(__float2half2_rn, __half2, float) + +static __half2 __float2half2_rn_ref(float x) { + return __half2{static_cast(x), static_cast(x)}; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half2_rn` for all possible inputs. The results are compared + * against reference function which returns __half2 value created from one casted float value. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half2_rn_Accuracy_Positive") { + UnarySinglePrecisionTest(__float2half2_rn_kernel, __float2half2_rn_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_KERNEL_DEF(__floats2half2_rn, __half2, float) + +static __half2 __floats2half2_rn_ref(float x, float y) { + return __half2{static_cast(x), static_cast(y)}; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__floats2half2_rn` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which returns __half2 value created from two casted float values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___floats2half2_rn_Accuracy_Positive") { + BinaryFloatingPointTest(__floats2half2_rn_kernel, __floats2half2_rn_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** float2 -> half2 **********/ + +__global__ void __float22half2_rn_kernel(__half2* const ys, const size_t num_xs, float* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + ys[i] = __float22half2_rn(make_float2(xs[i], -xs[i])); + } +} + +static __half2 __float22half2_rn_ref(float x) { + return __half2{static_cast(x), static_cast(-x)}; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float22half2_rn` for all possible inputs. The results are compared + * against reference function which returns __half2 value created from two casted float values. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float22half2_rn_Accuracy_Positive") { + UnarySinglePrecisionTest(__float22half2_rn_kernel, __float22half2_rn_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** half2 -> float **********/ + +CAST_HALF2_KERNEL_DEF(__low2float, float) + +static float __low2float_ref(Float16 x) { return static_cast(x); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__low2float` for all possible inputs. The results are compared + * against reference function which returns float value created from lower __half2 element. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___low2float_Accuracy_Positive") { + UnaryHalfPrecisionTest(__low2float_kernel, __low2float_ref, EqValidatorBuilderFactory()); +} + +CAST_HALF2_KERNEL_DEF(__high2float, float) + +static float __high2float_ref(Float16 x) { return static_cast(-x); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__high2float` for all possible inputs. The results are compared + * against reference function which returns float value created from higher __half2 element. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___high2float_Accuracy_Positive") { + UnaryHalfPrecisionTest(__high2float_kernel, __high2float_ref, EqValidatorBuilderFactory()); +} + +/********** half2 -> float2 **********/ + +CAST_HALF2_KERNEL_DEF(__half22float2, float2) + +static float2 __half22float2_ref(Float16 x) { + return make_float2(static_cast(x), static_cast(-x)); +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half22float2` for all possible inputs. The results are compared against + * reference function which returns float2 value created from casted elements of one __half2 value. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half22float2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__half22float2_kernel, __half22float2_ref, + Float2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} diff --git a/catch/unit/math/casting_half2_negative_kernels.cc b/catch/unit/math/casting_half2_negative_kernels.cc new file mode 100644 index 0000000000..d1552e45ba --- /dev/null +++ b/catch/unit/math/casting_half2_negative_kernels.cc @@ -0,0 +1,57 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_UNARY_KERNELS_SHELL(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T1* result, T2* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T1* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, T2 x) { *result = func_name(x); } + + +#define NEGATIVE_BINARY_KERNELS_SHELL(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T2* x, T2 y) { T1 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(T2 x, T2* y) { T1 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, T2 y) { T1 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(T2 x, Dummy y) { T1 result = func_name(x, y); } + +NEGATIVE_UNARY_KERNELS_SHELL(__half2half2, __half2, __half) +NEGATIVE_UNARY_KERNELS_SHELL(__low2half, __half, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__high2half, __half, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__low2half2, __half2, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__high2half2, __half2, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__lowhigh2highlow, __half2, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__float2half2_rn, __half2, float) +NEGATIVE_UNARY_KERNELS_SHELL(__float22half2_rn, __half2, float2) +NEGATIVE_UNARY_KERNELS_SHELL(__low2float, float, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__high2float, float, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__half22float2, float2, __half2) + +NEGATIVE_BINARY_KERNELS_SHELL(make_half2, __half2, __half) +NEGATIVE_BINARY_KERNELS_SHELL(__halves2half2, __half2, __half) +NEGATIVE_BINARY_KERNELS_SHELL(__lows2half2, __half2, __half2) +NEGATIVE_BINARY_KERNELS_SHELL(__highs2half2, __half2, __half2) +NEGATIVE_BINARY_KERNELS_SHELL(__floats2half2_rn, __half2, float) \ No newline at end of file diff --git a/catch/unit/math/casting_half2int_funcs.cc b/catch/unit/math/casting_half2int_funcs.cc new file mode 100644 index 0000000000..77c32fcb9c --- /dev/null +++ b/catch/unit/math/casting_half2int_funcs.cc @@ -0,0 +1,440 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" + +/** + * @addtogroup HalfPrecisionCastingIntTypes HalfPrecisionCastingIntTypes + * @{ + * @ingroup MathTest + */ + +#define CAST_HALF2INT_RN_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, Float16) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, Float16) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + T (*ref)(Float16) = kern_name##_ref; \ + CastUnaryHalfPrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_rn, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_rz, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_rd, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_ru, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_rn, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_rd, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_ru, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_rn` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_rn, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_rz` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_rz, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_rd` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_rd, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_ru` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_ru, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_rn` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_rn, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_rz` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_rz, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_rd` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_rd, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_ru` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_ru, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_rn, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_rz, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_rd, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_ru, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_rn, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_rz, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_rd, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_ru, unsigned long long) + +CAST_KERNEL_DEF(__half_as_short, short, Float16) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half_as_short` for all possible inputs. The results are compared + * against reference function which performs copy of __half value to short variable. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half_as_short_Accuracy_Positive") { + short (*ref)(Float16) = type2_as_type1_ref; + CastUnaryHalfPrecisionTest(__half_as_short_kernel, ref, EqValidatorBuilderFactory()); +} + +CAST_KERNEL_DEF(__half_as_ushort, unsigned short, Float16) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half_as_ushort` for all possible inputs. The results are compared + * against reference function which performs copy of __half value to unsigned short variable. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half_as_ushort_Accuracy_Positive") { + unsigned short (*ref)(Float16) = type2_as_type1_ref; + CastUnaryHalfPrecisionTest(__half_as_ushort_kernel, ref, + EqValidatorBuilderFactory()); +} \ No newline at end of file diff --git a/catch/unit/math/casting_half2int_negative_kernels.cc b/catch/unit/math/casting_half2int_negative_kernels.cc new file mode 100644 index 0000000000..6b7d75040f --- /dev/null +++ b/catch/unit/math/casting_half2int_negative_kernels.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(T* result, __half* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, __half x) { *result = unc_name(x); } + +NEGATIVE_KERNELS_SHELL(__half2int_rn, int) +NEGATIVE_KERNELS_SHELL(__half2int_rz, int) +NEGATIVE_KERNELS_SHELL(__half2int_rd, int) +NEGATIVE_KERNELS_SHELL(__half2int_ru, int) +NEGATIVE_KERNELS_SHELL(__half2uint_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2uint_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2uint_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2uint_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2short_rn, short) +NEGATIVE_KERNELS_SHELL(__half2short_rz, short) +NEGATIVE_KERNELS_SHELL(__half2short_rd, short) +NEGATIVE_KERNELS_SHELL(__half2short_ru, short) +NEGATIVE_KERNELS_SHELL(__half_as_short, short) +NEGATIVE_KERNELS_SHELL(__half2ushort_rn, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ushort_rz, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ushort_rd, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ushort_ru, unsigned short) +NEGATIVE_KERNELS_SHELL(__half_as_ushort, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ll_rn, long long) +NEGATIVE_KERNELS_SHELL(__half2ll_rz, long long) +NEGATIVE_KERNELS_SHELL(__half2ll_rd, long long) +NEGATIVE_KERNELS_SHELL(__half2ll_ru, long long) +NEGATIVE_KERNELS_SHELL(__half2ull_rn, unsigned long long) +NEGATIVE_KERNELS_SHELL(__half2ull_rz, unsigned long long) +NEGATIVE_KERNELS_SHELL(__half2ull_rd, unsigned long long) +NEGATIVE_KERNELS_SHELL(__half2ull_ru, unsigned long long) \ No newline at end of file diff --git a/catch/unit/math/casting_half_float_funcs.cc b/catch/unit/math/casting_half_float_funcs.cc new file mode 100644 index 0000000000..23d6de2e8b --- /dev/null +++ b/catch/unit/math/casting_half_float_funcs.cc @@ -0,0 +1,247 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" + +/** + * @addtogroup HalfPrecisionCastingFloat HalfPrecisionCastingFloat + * @{ + * @ingroup MathTest + */ + +#define CAST_FLOAT2HALF_TEST_DEF(kern_name, round_dir) \ + CAST_KERNEL_DEF(kern_name, Float16, float) \ + CAST_RND_REF_DEF(kern_name, Float16, float, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Limited_Positive") { \ + Float16 (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::min(), 0.f); \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + 0.0001f, std::numeric_limits::max()); \ + } + +#define CAST_FLOAT2HALF_RN_TEST_DEF(kern_name) \ + CAST_KERNEL_DEF(kern_name, Float16, float) \ + CAST_REF_DEF(kern_name, Float16, float) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + Float16 (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::min(), \ + std::numeric_limits::max()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_rd` for all possible inputs apart from very small positive + * values. Rounding behaviour is not correct for host functions for this range. The results are + * compared against reference function which performs float cast to __half with FE_DOWNWARD rounding + * mode. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_TEST_DEF(__float2half_rd, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_rn` for all possible inputs. The results are compared against + * reference function which performs float cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_RN_TEST_DEF(__float2half_rn) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half` for all possible inputs. The results are compared against + * reference function which performs float cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_RN_TEST_DEF(__float2half) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_ru` for all possible inputs apart from very small positive + * values. Rounding behaviour is not correct for host functions for this range. The results are + * compared against reference function which performs float cast to __half with FE_UPWARD rounding + * mode. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_TEST_DEF(__float2half_ru, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_rz` for all possible inputs apart from very small positive + * values. Rounding behaviour is not correct for host functions for this range. The results are + * compared against reference function which performs float cast to __half with FE_TOWARDZERO rounding + * mode. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_TEST_DEF(__float2half_rz, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - Sanity test that checks `__float2half_rd` for very small positive values. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half_rd_SmallVals_Sanity_Positive") { + const float input[] = {0.8859e-06f, 1.5454e-07f, 6.5955e-08f, 2.7955e-08f, + 3.7956e-09f, 4.8995e-10f, 5.7997e-15f, 6.2117e-20f, + 7.4999e-25f, 8.9999e-30f, 9.0001e-35f}; + const Float16 reference[] = {8.34465e-07, 1.19209e-07, 5.96046e-08, 0, 0, 0, 0, 0, 0, 0, 0}; + LinearAllocGuard input_dev{LinearAllocs::hipMalloc, sizeof(float)}; + LinearAllocGuard out(LinearAllocs::hipMallocManaged, sizeof(Float16)); + + + for (int i = 0; i < 11; ++i) { + HIP_CHECK(hipMemcpy(input_dev.ptr(), input + i, sizeof(float), hipMemcpyHostToDevice)); + + __float2half_rd_kernel<<<1, 1>>>(out.ptr(), 1, input_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(out.ptr()[0] == reference[i]); + } +} + +/** + * Test Description + * ------------------------ + * - Sanity test that checks `__float2half_ru` for very small positive values. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half_ru_SmallVals_Sanity_Positive") { + const float input[] = {0.8859e-06f, 1.5454e-07f, 6.5955e-08f, 2.7955e-08f, + 3.7956e-09f, 4.8995e-10f, 5.7997e-15f, 6.2117e-20f, + 7.4999e-25f, 8.9999e-30f, 9.0001e-35f}; + const Float16 reference[] = {8.9407e-07, 1.78814e-07, 1.19209e-07, 5.96046e-08, + 5.96046e-08, 5.96046e-08, 5.96046e-08, 5.96046e-08, + 5.96046e-08, 5.96046e-08, 5.96046e-08}; + LinearAllocGuard input_dev{LinearAllocs::hipMalloc, sizeof(float)}; + LinearAllocGuard out(LinearAllocs::hipMallocManaged, sizeof(Float16)); + + + for (int i = 0; i < 11; ++i) { + HIP_CHECK(hipMemcpy(input_dev.ptr(), input + i, sizeof(float), hipMemcpyHostToDevice)); + + __float2half_ru_kernel<<<1, 1>>>(out.ptr(), 1, input_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(out.ptr()[0] == reference[i]); + } +} + +/** + * Test Description + * ------------------------ + * - Sanity test that checks `__float2half_rz` for very small positive values. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half_rz_SmallVals_Sanity_Positive") { + const float input[] = {0.8859e-06f, 1.5454e-07f, 6.5955e-08f, 2.7955e-08f, + 3.7956e-09f, 4.8995e-10f, 5.7997e-15f, 6.2117e-20f, + 7.4999e-25f, 8.9999e-30f, 9.0001e-35f}; + const Float16 reference[] = {8.34465e-07, 1.19209e-07, 5.96046e-08, 0, 0, 0, 0, 0, 0, 0, 0}; + LinearAllocGuard input_dev{LinearAllocs::hipMalloc, sizeof(float)}; + LinearAllocGuard out(LinearAllocs::hipMallocManaged, sizeof(Float16)); + + + for (int i = 0; i < 11; ++i) { + HIP_CHECK(hipMemcpy(input_dev.ptr(), input + i, sizeof(float), hipMemcpyHostToDevice)); + + __float2half_rz_kernel<<<1, 1>>>(out.ptr(), 1, input_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(out.ptr()[0] == reference[i]); + } +} + +CAST_KERNEL_DEF(__half2float, float, Float16) +CAST_REF_DEF(__half2float, float, Float16) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2float` for all possible inputs. The results are compared against + * reference function which performs __half cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half2float_Accuracy_Positive") { + float (*ref)(Float16) = __half2float_ref; + UnaryHalfPrecisionTest(__half2float_kernel, ref, EqValidatorBuilderFactory()); +} \ No newline at end of file diff --git a/catch/unit/math/casting_half_float_negative_kernels.cc b/catch/unit/math/casting_half_float_negative_kernels.cc new file mode 100644 index 0000000000..9d849e6f5e --- /dev/null +++ b/catch/unit/math/casting_half_float_negative_kernels.cc @@ -0,0 +1,45 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_F2H_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(__half* result, float* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(__half* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, float x) { *result = func_name(x); } + +#define NEGATIVE_H2F_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(float* result, __half* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(float* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, __half x) { *result = func_name(x); } + +NEGATIVE_F2H_KERNELS_SHELL(__float2half_rd) +NEGATIVE_F2H_KERNELS_SHELL(__float2half_rn) +NEGATIVE_F2H_KERNELS_SHELL(__float2half_ru) +NEGATIVE_F2H_KERNELS_SHELL(__float2half_rz) +NEGATIVE_F2H_KERNELS_SHELL(__float2half) + +NEGATIVE_H2F_KERNELS_SHELL(__half2float) \ No newline at end of file diff --git a/catch/unit/math/casting_int2half_funcs.cc b/catch/unit/math/casting_int2half_funcs.cc new file mode 100644 index 0000000000..d0d404ebc9 --- /dev/null +++ b/catch/unit/math/casting_int2half_funcs.cc @@ -0,0 +1,448 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" + +/** + * @addtogroup HalfPrecisionCastingIntTypes HalfPrecisionCastingIntTypes + * @{ + * @ingroup MathTest + */ + +#define CAST_INT2HALF_RN_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, Float16, T) \ + CAST_REF_DEF(kern_name, Float16, T) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + Float16 (*ref)(T) = kern_name##_ref; \ + CastIntRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_rn` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_rn, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_rz` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_rz, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_rd` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_rd, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_ru` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_ru, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_rn` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_rn, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_rz` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_rd` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_rd, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_ru` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_ru, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_rn` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_rn, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_rz` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_rz, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_rd` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_rd, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_ru` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_ru, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_rn` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_rn, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_rz` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_rz, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_rd` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_rd, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_ru` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_ru, unsigned short) + +#define CAST_LL2HALF_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, Float16, T) \ + CAST_REF_DEF(kern_name, Float16, T) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + Float16 (*ref)(T) = kern_name##_ref; \ + CastIntBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_rn, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_rz, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_rd, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_ru, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_rn, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_rz, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_rd, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_ru, unsigned long long) + +CAST_KERNEL_DEF(__short_as_half, Float16, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short_as_half` for all possible inputs. The results are compared + * against reference function which performs copy of short value to __half variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___short_as_half_Accuracy_Positive") { + Float16 (*ref)(short) = type2_as_type1_ref; + CastIntBruteForceTest(__short_as_half_kernel, ref, EqValidatorBuilderFactory()); +} + +CAST_KERNEL_DEF(__ushort_as_half, Float16, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort_as_half` for all possible inputs. The results are compared + * against reference function which performs copy of unsigned short value to __half variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ushort_as_half_Accuracy_Positive") { + Float16 (*ref)(unsigned short) = type2_as_type1_ref; + CastIntBruteForceTest(__ushort_as_half_kernel, ref, EqValidatorBuilderFactory()); +} \ No newline at end of file diff --git a/catch/unit/math/casting_int2half_negative_kernels.cc b/catch/unit/math/casting_int2half_negative_kernels.cc new file mode 100644 index 0000000000..a23da476e6 --- /dev/null +++ b/catch/unit/math/casting_int2half_negative_kernels.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(__half* result, T* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(__half* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, T x) { *result = func_name(x); } + +NEGATIVE_KERNELS_SHELL(__int2half_rn, int) +NEGATIVE_KERNELS_SHELL(__int2half_rz, int) +NEGATIVE_KERNELS_SHELL(__int2half_rd, int) +NEGATIVE_KERNELS_SHELL(__int2half_ru, int) +NEGATIVE_KERNELS_SHELL(__uint2half_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__uint2half_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__uint2half_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__uint2half_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__short2half_rn, short) +NEGATIVE_KERNELS_SHELL(__short2half_rz, short) +NEGATIVE_KERNELS_SHELL(__short2half_rd, short) +NEGATIVE_KERNELS_SHELL(__short2half_ru, short) +NEGATIVE_KERNELS_SHELL(__short_as_half, short) +NEGATIVE_KERNELS_SHELL(__ushort2half_rn, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort2half_rz, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort2half_rd, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort2half_ru, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort_as_half, unsigned short) +NEGATIVE_KERNELS_SHELL(__ll2half_rn, long long) +NEGATIVE_KERNELS_SHELL(__ll2half_rz, long long) +NEGATIVE_KERNELS_SHELL(__ll2half_rd, long long) +NEGATIVE_KERNELS_SHELL(__ll2half_ru, long long) +NEGATIVE_KERNELS_SHELL(__ull2half_rn, unsigned long long) +NEGATIVE_KERNELS_SHELL(__ull2half_rz, unsigned long long) +NEGATIVE_KERNELS_SHELL(__ull2half_rd, unsigned long long) +NEGATIVE_KERNELS_SHELL(__ull2half_ru, unsigned long long) \ No newline at end of file diff --git a/catch/unit/math/casting_int_funcs.cc b/catch/unit/math/casting_int_funcs.cc new file mode 100644 index 0000000000..49e8ae7463 --- /dev/null +++ b/catch/unit/math/casting_int_funcs.cc @@ -0,0 +1,735 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "casting_common.hh" +#include "casting_int_negative_kernels_rtc.hh" + +/** + * @addtogroup CastingIntTypes CastingIntTypes + * @{ + * @ingroup MathTest + */ + +#define CAST_INT2FLOAT_TEST_DEF(kern_name, T1, T2, round_dir) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_RND_REF_DEF(kern_name, T1, T2, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_INT2FLOAT_RN_TEST_DEF(kern_name, T1, T2) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_REF_DEF(kern_name, T1, T2) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_rd` for all possible inputs. The results are compared against + * reference function which performs cast to float with FE_DOWNWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__int2float_rd, float, int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_rn` for all possible inputs. The results are compared against + * reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__int2float_rn, float, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_ru` for all possible inputs. The results are compared against + * reference function which performs cast to float with FE_UPWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__int2float_ru, float, int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_rz` for all possible inputs. The results are compared against + * reference function which performs cast to float with FE_TOWARDZERO rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__int2float_rz, float, int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __int2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_int2float___Negative_RTC") { NegativeTestRTCWrapper<12>(kInt2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_rd` for all possible inputs. The results are compared + * against reference function which performs cast to float with FE_DOWNWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__uint2float_rd, float, unsigned int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_rn` for all possible inputs. The results are compared + * against reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__uint2float_rn, float, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_ru` for all possible inputs. The results are compared + * against reference function which performs cast to float with FE_UPWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__uint2float_ru, float, unsigned int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_rz` for all possible inputs. The results are compared + * against reference function which performs cast to float with FE_TOWARDZERO rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__uint2float_rz, float, unsigned int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __uint2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kUint2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2double_rn` for all possible inputs. The results are compared + * against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__int2double_rn, double, int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __int2double_rn. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___int2double_Negative_RTC") { NegativeTestRTCWrapper<3>(kInt2Double); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2double_rn` for all possible inputs. The results are compared + * against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__uint2double_rn, double, unsigned int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __uint2double_rn. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint2double_Negative_RTC") { NegativeTestRTCWrapper<3>(kUint2Double); } + +#define CAST_LL2FLOAT_TEST_DEF(kern_name, T1, T2, round_dir) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_RND_REF_DEF(kern_name, T1, T2, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_LL2FLOAT_RN_TEST_DEF(kern_name, T1, T2) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_REF_DEF(kern_name, T1, T2) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2float_rd, float, long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ll2float_rn, float, long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2float_ru, float, long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2float_rz, float, long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ll2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ll2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kLL2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2float_rd, float, unsigned long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ull2float_rn, float, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2float_ru, float, unsigned long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2float_rz, float, unsigned long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ull2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ull2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kULL2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2double_rd, double, long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ll2double_rn, double, long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2double_ru, double, long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2double_rz, double, long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ll2double_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ll2double_Negative_RTC") { NegativeTestRTCWrapper<12>(kLL2Double); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2double_rd, double, unsigned long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ull2double_rn, double, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2double_ru, double, unsigned long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2double_rz, double, unsigned long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ull2double_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ull2double_Negative_RTC") { NegativeTestRTCWrapper<12>(kULL2Double); } + +CAST_KERNEL_DEF(__int_as_float, float, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int_as_float` for all possible inputs. The results are compared against + * reference function which performs copy of int value to float variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___int_as_float_Positive") { + float (*ref)(int) = type2_as_type1_ref; + CastIntRangeTest(__int_as_float_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __int_as_float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___int_as_float_Negative_RTC") { NegativeTestRTCWrapper<3>(kIntAsFloat); } + +CAST_KERNEL_DEF(__uint_as_float, float, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint_as_float` for all possible inputs. The results are compared + * against reference function which performs copy of unsigned int value to float variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint_as_float_Positive") { + float (*ref)(unsigned int) = type2_as_type1_ref; + CastIntRangeTest(__uint_as_float_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __uint_as_float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint_as_float_Negative_RTC") { NegativeTestRTCWrapper<3>(kUintAsFloat); } + +CAST_KERNEL_DEF(__longlong_as_double, double, long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__longlong_as_double` against a large number of randomly generated + * values. The results are compared against reference function which performs copy of long long int + * value to double variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___longlong_as_double_Positive") { + double (*ref)(long long int) = type2_as_type1_ref; + CastIntBruteForceTest(__longlong_as_double_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __longlong_as_double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___longlong_as_double_Negative_RTC") { + NegativeTestRTCWrapper<3>(kLonglongAsDouble); +} + +__global__ void __hiloint2double_kernel(double* const ys, const size_t num_xs, int* const x1s, + int* const x2s) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + ys[i] = __hiloint2double(x1s[i], x2s[i]); + } +} + +double __hiloint2double_ref(int hi, int lo) { + uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); + double tmp1; + memcpy(&tmp1, &tmp0, sizeof(tmp0)); + + return tmp1; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__hiloint2double` for all possible inputs for hi value. The results are + * compared against reference function which performs copy of hi int value to higher part of double + * variable and copy of lo int value to lower part of double variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___hiloint2double_Positive") { + double (*ref)(int, int) = __hiloint2double_ref; + CastBinaryIntRangeTest(__hiloint2double_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __hiloint2double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___hiloint2double_Negative_RTC") { NegativeTestRTCWrapper<5>(kHilo2Double); } \ No newline at end of file diff --git a/catch/unit/math/casting_int_negative_kernels.cc b/catch/unit/math/casting_int_negative_kernels.cc new file mode 100644 index 0000000000..3f2586d738 --- /dev/null +++ b/catch/unit/math/casting_int_negative_kernels.cc @@ -0,0 +1,79 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T1* result, T2* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T1* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, T2 x) { *result = func_name(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T1* result, T2* x, T2 y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(T1* result, T2 x, T2* y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v3(T1* result, Dummy x, T2 y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v4(T1* result, T2 x, Dummy y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v5(Dummy* result, T2 x, T2 y) { \ + *result = func_name(x, y); \ + } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_rd, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_rn, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_ru, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_rz, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_rd, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_rn, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_ru, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_rz, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_rd, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_rn, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_ru, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_rz, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_rd, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_rn, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_ru, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_rz, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2double_rn, double, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2double_rn, double, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_rd, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_rn, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_ru, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_rz, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_rd, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_rn, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_ru, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_rz, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int_as_float, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint_as_float, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__longlong_as_double, double, long long int) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(__hiloint2double, double, int) \ No newline at end of file diff --git a/catch/unit/math/casting_int_negative_kernels_rtc.hh b/catch/unit/math/casting_int_negative_kernels_rtc.hh new file mode 100644 index 0000000000..acdc621f8a --- /dev/null +++ b/catch/unit/math/casting_int_negative_kernels_rtc.hh @@ -0,0 +1,215 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the int/long long type casting negative Test Cases that are using RTC. +*/ + +static constexpr auto kInt2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void int2float_rd_kernel_v1(float* result, int* x) { *result = __int2float_rd(x); } + __global__ void int2float_rd_kernel_v2(float* result, Dummy x) { *result = __int2float_rd(x); } + __global__ void int2float_rd_kernel_v3(Dummy* result, int x) { *result = __int2float_rd(x); } + __global__ void int2float_rn_kernel_v1(float* result, int* x) { *result = __int2float_rn(x); } + __global__ void int2float_rn_kernel_v2(float* result, Dummy x) { *result = __int2float_rn(x); } + __global__ void int2float_rn_kernel_v3(Dummy* result, int x) { *result = __int2float_rn(x); } + __global__ void int2float_ru_kernel_v1(float* result, int* x) { *result = __int2float_ru(x); } + __global__ void int2float_ru_kernel_v2(float* result, Dummy x) { *result = __int2float_ru(x); } + __global__ void int2float_ru_kernel_v3(Dummy* result, int x) { *result = __int2float_ru(x); } + __global__ void int2float_rz_kernel_v1(float* result, int* x) { *result = __int2float_rz(x); } + __global__ void int2float_rz_kernel_v2(float* result, Dummy x) { *result = __int2float_rz(x); } + __global__ void int2float_rz_kernel_v3(Dummy* result, int x) { *result = __int2float_rz(x); } +)"}; + +static constexpr auto kUint2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void uint2float_rd_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_rd(x); } + __global__ void uint2float_rd_kernel_v2(float* result, Dummy x) { *result = __uint2float_rd(x); } + __global__ void uint2float_rd_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_rd(x); } + __global__ void uint2float_rn_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_rn(x); } + __global__ void uint2float_rn_kernel_v2(float* result, Dummy x) { *result = __uint2float_rn(x); } + __global__ void uint2float_rn_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_rn(x); } + __global__ void uint2float_ru_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_ru(x); } + __global__ void uint2float_ru_kernel_v2(float* result, Dummy x) { *result = __uint2float_ru(x); } + __global__ void uint2float_ru_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_ru(x); } + __global__ void uint2float_rz_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_rz(x); } + __global__ void uint2float_rz_kernel_v2(float* result, Dummy x) { *result = __uint2float_rz(x); } + __global__ void uint2float_rz_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_rz(x); } +)"}; + +static constexpr auto kLL2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ll2float_rd_kernel_v1(float* result, long long int* x) { *result = __ll2float_rd(x); } + __global__ void ll2float_rd_kernel_v2(float* result, Dummy x) { *result = __ll2float_rd(x); } + __global__ void ll2float_rd_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_rd(x); } + __global__ void ll2float_rn_kernel_v1(float* result, long long int* x) { *result = __ll2float_rn(x); } + __global__ void ll2float_rn_kernel_v2(float* result, Dummy x) { *result = __ll2float_rn(x); } + __global__ void ll2float_rn_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_rn(x); } + __global__ void ll2float_ru_kernel_v1(float* result, long long int* x) { *result = __ll2float_ru(x); } + __global__ void ll2float_ru_kernel_v2(float* result, Dummy x) { *result = __ll2float_ru(x); } + __global__ void ll2float_ru_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_ru(x); } + __global__ void ll2float_rz_kernel_v1(float* result, long long int* x) { *result = __ll2float_rz(x); } + __global__ void ll2float_rz_kernel_v2(float* result, Dummy x) { *result = __ll2float_rz(x); } + __global__ void ll2float_rz_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_rz(x); } +)"}; + +static constexpr auto kULL2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ull2float_rd_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_rd(x); } + __global__ void ull2float_rd_kernel_v2(float* result, Dummy x) { *result = __ull2float_rd(x); } + __global__ void ull2float_rd_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_rd(x); } + __global__ void ull2float_rn_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_rn(x); } + __global__ void ull2float_rn_kernel_v2(float* result, Dummy x) { *result = __ull2float_rn(x); } + __global__ void ull2float_rn_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_rn(x); } + __global__ void ull2float_ru_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_ru(x); } + __global__ void ull2float_ru_kernel_v2(float* result, Dummy x) { *result = __ull2float_ru(x); } + __global__ void ull2float_ru_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_ru(x); } + __global__ void ull2float_rz_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_rz(x); } + __global__ void ull2float_rz_kernel_v2(float* result, Dummy x) { *result = __ull2float_rz(x); } + __global__ void ull2float_rz_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_rz(x); } +)"}; + +static constexpr auto kIntAsFloat{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void int_as_float_kernel_v1(float* result, int* x) { *result = __int_as_float(x); } + __global__ void int_as_float_kernel_v2(float* result, Dummy x) { *result = __int_as_float(x); } + __global__ void int_as_float_kernel_v3(Dummy* result, int x) { *result = __int_as_float(x); } +)"}; + +static constexpr auto kUintAsFloat{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void uint_as_float_kernel_v1(float* result, unsigned int* x) { *result = __uint_as_float(x); } + __global__ void uint_as_float_kernel_v2(float* result, Dummy x) { *result = __uint_as_float(x); } + __global__ void uint_as_float_kernel_v3(Dummy* result, unsigned int x) { *result = __uint_as_float(x); } +)"}; + +static constexpr auto kInt2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void int2double_rn_kernel_v1(double* result, int* x) { *result = __int2double_rn(x); } + __global__ void int2double_rn_kernel_v2(double* result, Dummy x) { *result = __int2double_rn(x); } + __global__ void int2double_rn_kernel_v3(Dummy* result, int x) { *result = __int2double_rn(x); } +)"}; + +static constexpr auto kUint2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void uint2double_rn_kernel_v1(double* result, unsigned int* x) { *result = __uint2double_rn(x); } + __global__ void uint2double_rn_kernel_v2(double* result, Dummy x) { *result = __uint2double_rn(x); } + __global__ void uint2double_rn_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2double_rn(x); } +)"}; + + +static constexpr auto kLL2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ll2double_rd_kernel_v1(double* result, long long int* x) { *result = __ll2double_rd(x); } + __global__ void ll2double_rd_kernel_v2(double* result, Dummy x) { *result = __ll2double_rd(x); } + __global__ void ll2double_rd_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_rd(x); } + __global__ void ll2double_rn_kernel_v1(double* result, long long int* x) { *result = __ll2double_rn(x); } + __global__ void ll2double_rn_kernel_v2(double* result, Dummy x) { *result = __ll2double_rn(x); } + __global__ void ll2double_rn_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_rn(x); } + __global__ void ll2double_ru_kernel_v1(double* result, long long int* x) { *result = __ll2double_ru(x); } + __global__ void ll2double_ru_kernel_v2(double* result, Dummy x) { *result = __ll2double_ru(x); } + __global__ void ll2double_ru_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_ru(x); } + __global__ void ll2double_rz_kernel_v1(double* result, long long int* x) { *result = __ll2double_rz(x); } + __global__ void ll2double_rz_kernel_v2(double* result, Dummy x) { *result = __ll2double_rz(x); } + __global__ void ll2double_rz_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_rz(x); } +)"}; + +static constexpr auto kULL2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ull2double_rd_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_rd(x); } + __global__ void ull2double_rd_kernel_v2(double* result, Dummy x) { *result = __ull2double_rd(x); } + __global__ void ull2double_rd_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_rd(x); } + __global__ void ull2double_rn_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_rn(x); } + __global__ void ull2double_rn_kernel_v2(double* result, Dummy x) { *result = __ull2double_rn(x); } + __global__ void ull2double_rn_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_rn(x); } + __global__ void ull2double_ru_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_ru(x); } + __global__ void ull2double_ru_kernel_v2(double* result, Dummy x) { *result = __ull2double_ru(x); } + __global__ void ull2double_ru_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_ru(x); } + __global__ void ull2double_rz_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_rz(x); } + __global__ void ull2double_rz_kernel_v2(double* result, Dummy x) { *result = __ull2double_rz(x); } + __global__ void ull2double_rz_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_rz(x); } +)"}; + +static constexpr auto kLonglongAsDouble{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void longlong_as_double_kernel_v1(double* result, long long int* x) { *result = __longlong_as_double(x); } + __global__ void longlong_as_double_kernel_v2(double* result, Dummy x) { *result = __longlong_as_double(x); } + __global__ void longlong_as_double_kernel_v3(Dummy* result, long long int x) { *result = __longlong_as_double(x); } +)"}; + +static constexpr auto kHilo2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void hiloint2double_kernel_v1(double* result, int* x, int y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v2(double* result, int x, int* y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v3(double* result, Dummy x, int y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v4(double* result, int x, Dummy y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v5(Dummy* result, int x, int y) { *result = __hiloint2double(x, y); } +)"}; + + diff --git a/catch/unit/math/double_precision_intrinsics.cc b/catch/unit/math/double_precision_intrinsics.cc new file mode 100644 index 0000000000..69e5e2a8d0 --- /dev/null +++ b/catch/unit/math/double_precision_intrinsics.cc @@ -0,0 +1,243 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +/********** Unary Functions **********/ + +#define MATH_UNARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryDoublePrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_DP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(double target, double x) + + +static double __drcp_rn_ref(double x) { return 1.0 / x; } + +MATH_UNARY_DP_KERNEL_DEF(__drcp_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__drcp_rn(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_DP_TEST_DEF_IMPL(__drcp_rn, __drcp_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_DP_KERNEL_DEF(__dsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dsqrt_rn(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `double std::sqrt(double)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_DP_TEST_DEF_IMPL(__dsqrt_rn, static_cast(std::sqrt), + EqValidatorBuilderFactory()); + + +/********** Binary Functions **********/ + +#define MATH_BINARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s, \ + double* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_DP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_DP_TEST_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(double target, \ + double x1, double x2) + + +static double __dadd_rn_ref(double x1, double x2) { return x1 + x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dadd_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dadd_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dadd_rn, __dadd_rn_ref, EqValidatorBuilderFactory()); + + +static double __dsub_rn_ref(double x1, double x2) { return x1 - x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dsub_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dsub_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dsub_rn, __dsub_rn_ref, EqValidatorBuilderFactory()); + + +static double __dmul_rn_ref(double x1, double x2) { return x1 * x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dmul_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dmul_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dmul_rn, __dmul_rn_ref, EqValidatorBuilderFactory()); + + +static double __ddiv_rn_ref(double x1, double x2) { return x1 / x2; } + +MATH_BINARY_DP_KERNEL_DEF(__ddiv_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__ddiv_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__ddiv_rn, __ddiv_rn_ref, EqValidatorBuilderFactory()); + + +/********** Ternary Functions **********/ + +#define MATH_TERNARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s, \ + double* const x2s, double* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_DP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder( \ + double target, double x1, double x2, double x3) + + +MATH_TERNARY_DP_KERNEL_DEF(__fma_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fma(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_DP_TEST_DEF_IMPL(__fma_rn, static_cast(std::fma), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/double_precision_intrinsics_negative_kernels.cc b/catch/unit/math/double_precision_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..4ea26ae102 --- /dev/null +++ b/catch/unit/math/double_precision_intrinsics_negative_kernels.cc @@ -0,0 +1,46 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +#define INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } + + +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dadd_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dsub_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dmul_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__ddiv_rn) +INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(__dsqrt_rn) \ No newline at end of file diff --git a/catch/unit/math/half_precision_arithmetic.cc b/catch/unit/math/half_precision_arithmetic.cc new file mode 100644 index 0000000000..b909fb04af --- /dev/null +++ b/catch/unit/math/half_precision_arithmetic.cc @@ -0,0 +1,441 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" + +/** + * @addtogroup HalfPrecisionArithmetic HalfPrecisionArithmetic + * @{ + * @ingroup MathTest + */ + + +MATH_UNARY_HP_KERNEL_DEF(__habs); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__habs(x)` for all possible inputs. The results are + * compared against reference function `float std::abs(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__habs, static_cast(std::abs), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(__habs2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__habs2(x)` for all possible inputs. The results are + * compared against reference function `float std::abs(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__habs2, static_cast(std::abs), + EqValidatorBuilderFactory()); + + +static float __hneg_ref(float x) { return -x; } + +MATH_UNARY_HP_KERNEL_DEF(__hneg); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneg(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hneg, __hneg_ref, EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(__hneg2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneg2(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hneg2, __hneg_ref, EqValidatorBuilderFactory()); + + +// Wrapper to avoid ambiguity error with __hadd(int, int) +__device__ __half __hadd_wrapper(__half x1, __half x2) { return __hadd(x1, x2); } + +static float __hadd_ref(float x1, float x2) { return x1 + x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hadd_wrapper); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd_wrapper, __hadd_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hadd2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd2, __hadd_ref, EqValidatorBuilderFactory()); + + +static float __hadd_sat_ref(float x1, float x2) { return std::clamp(x1 + x2, 0.0f, 1.0f); } + +MATH_BINARY_HP_KERNEL_DEF(__hadd_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd_sat, __hadd_sat_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hadd2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd2_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd2_sat, __hadd_sat_ref, EqValidatorBuilderFactory()); + + +static float __hsub_ref(float x1, float x2) { return x1 - x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hsub); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub, __hsub_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hsub2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub2, __hsub_ref, EqValidatorBuilderFactory()); + + +static float __hsub_sat_ref(float x1, float x2) { return std::clamp(x1 - x2, 0.0f, 1.0f); } + +MATH_BINARY_HP_KERNEL_DEF(__hsub_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub_sat, __hsub_sat_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hsub2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub2_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub2_sat, __hsub_sat_ref, EqValidatorBuilderFactory()); + + +static float __hmul_ref(float x1, float x2) { return x1 * x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hmul); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul, __hmul_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hmul2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul2, __hmul_ref, EqValidatorBuilderFactory()); + + +static float __hmul_sat_ref(float x1, float x2) { return std::clamp(x1 * x2, 0.0f, 1.0f); } + +MATH_BINARY_HP_KERNEL_DEF(__hmul_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul_sat, __hmul_sat_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hmul2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul2_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul2_sat, __hmul_sat_ref, EqValidatorBuilderFactory()); + + +static float __hdiv_ref(float x1, float x2) { return x1 / x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hdiv); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hdiv(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hdiv, __hdiv_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__h2div); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__h2div(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__h2div, __hdiv_ref, EqValidatorBuilderFactory()); + + +MATH_TERNARY_HP_KERNEL_DEF(__hfma); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma, static_cast(std::fma), + EqValidatorBuilderFactory()); + +MATH_TERNARY_HP_KERNEL_DEF(__hfma2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma2(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma2, static_cast(std::fma), + EqValidatorBuilderFactory()); + + +static float __hfma_sat_ref(float x1, float x2, float x3) { + return std::clamp(std::fma(x1, x2, x3), 0.0f, 1.0f); +} + +MATH_TERNARY_HP_KERNEL_DEF(__hfma_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma_sat(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma_sat, __hfma_sat_ref, EqValidatorBuilderFactory()); + +MATH_TERNARY_HP_KERNEL_DEF(__hfma2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma2_sat(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma2_sat, __hfma_sat_ref, EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/half_precision_arithmetic_negative_kernels.cc b/catch/unit/math/half_precision_arithmetic_negative_kernels.cc new file mode 100644 index 0000000000..855499816d --- /dev/null +++ b/catch/unit/math/half_precision_arithmetic_negative_kernels.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + + +#define UNARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x) { __half result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half result = func_name(x); } + +#define BINARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half x, __half* y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half x, Dummy y) { __half result = func_name(x, y); } + +#define TERNARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(__half x, __half* y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(__half x, __half y, __half* z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, __half y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(__half x, Dummy y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(__half x, __half y, Dummy z) { \ + __half result = func_name(x, y, z); \ + } + +UNARY_HALF_NEGATIVE_KERNELS(__habs) +UNARY_HALF_NEGATIVE_KERNELS(__hneg) + +BINARY_HALF_NEGATIVE_KERNELS(__hadd) +BINARY_HALF_NEGATIVE_KERNELS(__hadd_sat) +BINARY_HALF_NEGATIVE_KERNELS(__hsub) +BINARY_HALF_NEGATIVE_KERNELS(__hsub_sat) +BINARY_HALF_NEGATIVE_KERNELS(__hmul) +BINARY_HALF_NEGATIVE_KERNELS(__hmul_sat) +BINARY_HALF_NEGATIVE_KERNELS(__hdiv) + +TERNARY_HALF_NEGATIVE_KERNELS(__hfma) +TERNARY_HALF_NEGATIVE_KERNELS(__hfma_sat) + + +#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); } + +#define BINARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, __half2 y) { __half2 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half2 x, Dummy y) { __half2 result = func_name(x, y); } + +#define TERNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(__half2 x, __half2 y, __half2* z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, __half2 y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(__half2 x, Dummy y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(__half2 x, __half2 y, Dummy z) { \ + __half2 result = func_name(x, y, z); \ + } + +UNARY_HALF2_NEGATIVE_KERNELS(__habs2) +UNARY_HALF2_NEGATIVE_KERNELS(__hneg2) + +BINARY_HALF2_NEGATIVE_KERNELS(__hadd2) +BINARY_HALF2_NEGATIVE_KERNELS(__hadd2_sat) +BINARY_HALF2_NEGATIVE_KERNELS(__hsub2) +BINARY_HALF2_NEGATIVE_KERNELS(__hsub2_sat) +BINARY_HALF2_NEGATIVE_KERNELS(__hmul2) +BINARY_HALF2_NEGATIVE_KERNELS(__hmul2_sat) +BINARY_HALF2_NEGATIVE_KERNELS(__h2div) + +TERNARY_HALF2_NEGATIVE_KERNELS(__hfma2) +TERNARY_HALF2_NEGATIVE_KERNELS(__hfma2_sat) \ No newline at end of file diff --git a/catch/unit/math/half_precision_common.hh b/catch/unit/math/half_precision_common.hh new file mode 100644 index 0000000000..1f494058b0 --- /dev/null +++ b/catch/unit/math/half_precision_common.hh @@ -0,0 +1,103 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + + +/********** Unary **********/ + +#define MATH_UNARY_HP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(Float16* const ys, const size_t num_xs, Float16* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_HP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryHalfPrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_HP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_HP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_HP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x) + + +/********** Binary **********/ + +#define MATH_BINARY_HP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(Float16* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_HP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_HP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_HP_TEST_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_HP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2) + + +/********** Ternary **********/ + +#define MATH_TERNARY_HP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(Float16* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s, Float16* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_HP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_HP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_HP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_HP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2, float x3) \ No newline at end of file diff --git a/catch/unit/math/half_precision_comparison.cc b/catch/unit/math/half_precision_comparison.cc new file mode 100644 index 0000000000..c736054e6d --- /dev/null +++ b/catch/unit/math/half_precision_comparison.cc @@ -0,0 +1,847 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" + +/** + * @addtogroup HalfPrecisionComparison HalfPrecisionComparison + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +#define MATH_BOOL_UNARY_HP_TEST_DEF(func_name, ref_func) \ + __global__ void func_name##_kernel(bool* const ys, const size_t num_xs, Float16* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } \ + \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryHalfPrecisionTest(func_name##_kernel, ref_func, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisinf(x)` for all possible inputs. The results are + * compared against reference function `bool std::isinf(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BOOL_UNARY_HP_TEST_DEF(__hisinf, static_cast(std::isinf)) + +static float __hisinf2_ref(float x) { return static_cast(std::isinf(x)); } + +MATH_UNARY_HP_KERNEL_DEF(__hisinf2) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisinf2(x)` for all possible inputs. The results are + * compared against reference function `float std::isinf(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hisinf2, __hisinf2_ref, EqValidatorBuilderFactory()); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisnan(x)` for all possible inputs. The results are + * compared against reference function `bool std::isnan(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BOOL_UNARY_HP_TEST_DEF(__hisnan, static_cast(std::isnan)) + +static float __hisnan2_ref(float x) { return static_cast(std::isnan(x)); } + +MATH_UNARY_HP_KERNEL_DEF(__hisnan2) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisnan2(x)` for all possible inputs. The results are + * compared against reference function `float std::isnan(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hisnan2, __hisnan2_ref, EqValidatorBuilderFactory()); + +/********** Binary Functions **********/ + +#define MATH_COMPARISON_HP_TEST_DEF(func_name, ref_func, T, RT, nan_value) \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, \ + EqValidatorBuilderFactory()); \ + } + + +template static T __heq_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 == x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__heq(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__heq, __heq_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbeq2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbeq2, __heq_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hequ(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hequ, __heq_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbequ2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbequ2, __heq_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__heq2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__heq2, __heq_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hequ2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hequ2, __heq_ref, Float16, float, true) + + +template static T __hne_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 != x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hne(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hne, __hne_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbne2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbne2, __hne_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hneu, __hne_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbneu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'not equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbneu2, __hne_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hne2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hne2, __hne_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hneu2, __hne_ref, Float16, float, true) + + +template static T __hge_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 >= x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hge(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hge, __hge_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbge2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbge2, __hge_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgeu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgeu, __hge_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbgeu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbgeu2, __hge_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hge2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hge2, __hge_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgeu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgeu2, __hge_ref, Float16, float, true) + + +template static T __hgt_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 > x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgt(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgt, __hgt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbgt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbgt2, __hgt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgtu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgtu, __hgt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbgtu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbgtu2, __hgt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgt2, __hgt_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgtu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgtu2, __hgt_ref, Float16, float, true) + + +template static T __hle_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 <= x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hle(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hle, __hle_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hble2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hble2, __hle_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hleu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hleu, __hle_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbleu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'less than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbleu2, __hle_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hle2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hle2, __hle_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hleu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hleu2, __hle_ref, Float16, float, true) + + +template static T __hlt_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 < x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hlt(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hlt, __hlt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hblt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hblt2, __hlt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hltu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hltu, __hlt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbltu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'less than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbltu2, __hlt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hlt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hlt2, __hlt_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hltu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hltu2, __hlt_ref, Float16, float, true) + +MATH_BINARY_HP_KERNEL_DEF(__hmax) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmax(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against reference + * function `float std::fmax(float, float)` + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmax, static_cast(std::fmax), + EqValidatorBuilderFactory()) + +MATH_BINARY_HP_KERNEL_DEF(__hmin) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmin(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against reference + * function `float std::fmin(float, float)` + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmin, static_cast(std::fmin), + EqValidatorBuilderFactory()) + +static float __hmax_nan_ref(float x1, float x2) { + if (std::isnan(x1)) + return x1; + else if (std::isnan(x2)) + return x2; + else + return std::fmax(x1, x2); +} + +MATH_BINARY_HP_KERNEL_DEF(__hmax_nan) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmax_nan(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against + * reference function `float std::fmax(float, float)` with modified result when an operand is nan. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmax_nan, __hmax_nan_ref, EqValidatorBuilderFactory()) + +static float __hmin_nan_ref(float x1, float x2) { + if (std::isnan(x1)) + return x1; + else if (std::isnan(x2)) + return x2; + else + return std::fmin(x1, x2); +} + +MATH_BINARY_HP_KERNEL_DEF(__hmin_nan) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmin_nan(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against + * reference function `float std::fmin(float, float)` with modified result when an operand is nan. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmin_nan, __hmin_nan_ref, EqValidatorBuilderFactory()) \ No newline at end of file diff --git a/catch/unit/math/half_precision_comparison_negative_kernels.cc b/catch/unit/math/half_precision_comparison_negative_kernels.cc new file mode 100644 index 0000000000..a045af211c --- /dev/null +++ b/catch/unit/math/half_precision_comparison_negative_kernels.cc @@ -0,0 +1,120 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + + +#define UNARY_BOOL_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { bool result = func_name(x); } + +#define BINARY_BOOL_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half x, __half* y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half x, Dummy y) { bool result = func_name(x, y); } + + +#define BINARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half x, __half* y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half x, Dummy y) { __half result = func_name(x, y); } + + +UNARY_BOOL_HALF_NEGATIVE_KERNELS(__hisinf) +UNARY_BOOL_HALF_NEGATIVE_KERNELS(__hisnan) + +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__heq) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hequ) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hne) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hneu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hge) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hgeu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hgt) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hgtu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hle) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hleu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hlt) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hltu) + +BINARY_HALF_NEGATIVE_KERNELS(__hmax) +BINARY_HALF_NEGATIVE_KERNELS(__hmax_nan) +BINARY_HALF_NEGATIVE_KERNELS(__hmin) +BINARY_HALF_NEGATIVE_KERNELS(__hmin_nan) + + +#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); } + +#define BINARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, __half2 y) { __half2 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half2 x, Dummy y) { __half2 result = func_name(x, y); } + +#define BINARY_BOOL_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half2 y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half2 x, Dummy y) { bool result = func_name(x, y); } + +UNARY_HALF2_NEGATIVE_KERNELS(__hisinf2) +UNARY_HALF2_NEGATIVE_KERNELS(__hisnan2) + +BINARY_HALF2_NEGATIVE_KERNELS(__heq2) +BINARY_HALF2_NEGATIVE_KERNELS(__hequ2) +BINARY_HALF2_NEGATIVE_KERNELS(__hne2) +BINARY_HALF2_NEGATIVE_KERNELS(__hneu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hge2) +BINARY_HALF2_NEGATIVE_KERNELS(__hgeu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hgt2) +BINARY_HALF2_NEGATIVE_KERNELS(__hgtu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hle2) +BINARY_HALF2_NEGATIVE_KERNELS(__hleu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hlt2) +BINARY_HALF2_NEGATIVE_KERNELS(__hltu2) + +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbeq2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbequ2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbne2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbneu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbge2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbgeu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbgt2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbgtu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hble2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbleu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hblt2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbltu2) \ No newline at end of file diff --git a/catch/unit/math/half_precision_math.cc b/catch/unit/math/half_precision_math.cc new file mode 100644 index 0000000000..a1524b1f7e --- /dev/null +++ b/catch/unit/math/half_precision_math.cc @@ -0,0 +1,580 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" + +/** + * @addtogroup HalfPrecisionMath HalfPrecisionMath + * @{ + * @ingroup MathTest + */ + + +MATH_UNARY_HP_KERNEL_DEF(hcos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hcos(x)` for all possible inputs. The results are + * compared against reference function `float std::cos(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hcos, static_cast(std::cos), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2cos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2cos(x)` for all possible inputs. The results are + * compared against reference function `float std::cos(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2cos, static_cast(std::cos), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hsin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hsin(x)` for all possible inputs. The results are + * compared against reference function `float std::sin(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hsin, static_cast(std::sin), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2sin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2sin(x)` for all possible inputs. The results are + * compared against reference function `float std::sin(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2sin, static_cast(std::sin), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hexp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hexp(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hexp, static_cast(std::exp), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2exp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2exp(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2exp, static_cast(std::exp), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hexp10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hexp10(x)` for all possible inputs. The results are + * compared against reference function `float exp10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hexp10, static_cast(exp10f), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2exp10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2exp10(x)` for all possible inputs. The results are + * compared against reference function `float exp10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2exp10, static_cast(exp10f), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hexp2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hexp2(x)` for all possible inputs. The results are + * compared against reference function `float std::exp2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hexp2, static_cast(std::exp2), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2exp2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2exp2(x)` for all possible inputs. The results are + * compared against reference function `float std::exp2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2exp2, static_cast(std::exp2), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hlog); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hlog(x)` for all possible inputs. The results are + * compared against reference function `float std::log(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hlog, static_cast(std::log), + ULPValidatorBuilderFactory(1)); + +MATH_UNARY_HP_KERNEL_DEF(h2log); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2log(x)` for all possible inputs. The results are + * compared against reference function `float std::log(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2log, static_cast(std::log), + ULPValidatorBuilderFactory(1)); + + +MATH_UNARY_HP_KERNEL_DEF(hlog10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hlog10(x)` for all possible inputs. The results are + * compared against reference function `float std::log10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hlog10, static_cast(std::log10), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2log10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2log10(x)` for all possible inputs. The results are + * compared against reference function `float std::log10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2log10, static_cast(std::log10), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hlog2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hlog2(x)` for all possible inputs. The results are + * compared against reference function `float std::log2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hlog2, static_cast(std::log2), + ULPValidatorBuilderFactory(1)); + +MATH_UNARY_HP_KERNEL_DEF(h2log2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2log2(x)` for all possible inputs. The results are + * compared against reference function `float std::log2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2log2, static_cast(std::log2), + ULPValidatorBuilderFactory(1)); + + +MATH_UNARY_HP_KERNEL_DEF(hsqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hsqrt(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hsqrt, static_cast(std::sqrt), + ULPValidatorBuilderFactory(1)); + +MATH_UNARY_HP_KERNEL_DEF(h2sqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2sqrt(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2sqrt, static_cast(std::sqrt), + ULPValidatorBuilderFactory(1)); + + +MATH_UNARY_HP_KERNEL_DEF(hceil); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hceil(x)` for all possible inputs. The results are + * compared against reference function `float std::ceil(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hceil, static_cast(std::ceil), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2ceil); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2ceil(x)` for all possible inputs. The results are + * compared against reference function `float std::ceil(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2ceil, static_cast(std::ceil), + EqValidatorBuilderFactory()); + + +MATH_UNARY_HP_KERNEL_DEF(hfloor); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hfloor(x)` for all possible inputs. The results are + * compared against reference function `float std::floor(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hfloor, static_cast(std::floor), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2floor); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2floor(x)` for all possible inputs. The results are + * compared against reference function `float std::floor(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2floor, static_cast(std::floor), + EqValidatorBuilderFactory()); + + +MATH_UNARY_HP_KERNEL_DEF(htrunc); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `htrunc(x)` for all possible inputs. The results are + * compared against reference function `float std::trunc(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(htrunc, static_cast(std::trunc), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2trunc); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2trunc(x)` for all possible inputs. The results are + * compared against reference function `float std::trunc(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2trunc, static_cast(std::trunc), + EqValidatorBuilderFactory()); + + +static float hrcp_ref(float x) { return 1.0f / x; } + +MATH_UNARY_HP_KERNEL_DEF(hrcp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hrcp(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hrcp, hrcp_ref, EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2rcp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2rcp(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2rcp, hrcp_ref, EqValidatorBuilderFactory()); + + +static float hrsqrt_ref(float x) { return 1.0f / std::sqrt(x); } + +MATH_UNARY_HP_KERNEL_DEF(hrsqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hrsqrt(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hrsqrt, hrsqrt_ref, EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2rsqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2rsqrt(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2rsqrt, hrsqrt_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_HP_KERNEL_DEF(hrint); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hrint(x)` for all possible inputs. The results are + * compared against reference function `float std::rint(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hrint, static_cast(std::rint), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2rint); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2rint(x)` for all possible inputs. The results are + * compared against reference function `float std::rint(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2rint, static_cast(std::rint), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/half_precision_math_negative_kernels.cc b/catch/unit/math/half_precision_math_negative_kernels.cc new file mode 100644 index 0000000000..bf0338974d --- /dev/null +++ b/catch/unit/math/half_precision_math_negative_kernels.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + + +#define UNARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x) { __half result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half result = func_name(x); } + +UNARY_HALF_NEGATIVE_KERNELS(hcos) +UNARY_HALF_NEGATIVE_KERNELS(hsin) +UNARY_HALF_NEGATIVE_KERNELS(hexp) +UNARY_HALF_NEGATIVE_KERNELS(hexp10) +UNARY_HALF_NEGATIVE_KERNELS(hexp2) +UNARY_HALF_NEGATIVE_KERNELS(hlog) +UNARY_HALF_NEGATIVE_KERNELS(hlog10) +UNARY_HALF_NEGATIVE_KERNELS(hlog2) +UNARY_HALF_NEGATIVE_KERNELS(hsqrt) +UNARY_HALF_NEGATIVE_KERNELS(hceil) +UNARY_HALF_NEGATIVE_KERNELS(hfloor) +UNARY_HALF_NEGATIVE_KERNELS(htrunc) +UNARY_HALF_NEGATIVE_KERNELS(hrcp) +UNARY_HALF_NEGATIVE_KERNELS(hrsqrt) +UNARY_HALF_NEGATIVE_KERNELS(hrint) + + +#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); } + +UNARY_HALF2_NEGATIVE_KERNELS(h2cos) +UNARY_HALF2_NEGATIVE_KERNELS(h2sin) +UNARY_HALF2_NEGATIVE_KERNELS(h2exp) +UNARY_HALF2_NEGATIVE_KERNELS(h2exp10) +UNARY_HALF2_NEGATIVE_KERNELS(h2exp2) +UNARY_HALF2_NEGATIVE_KERNELS(h2log) +UNARY_HALF2_NEGATIVE_KERNELS(h2log10) +UNARY_HALF2_NEGATIVE_KERNELS(h2log2) +UNARY_HALF2_NEGATIVE_KERNELS(h2sqrt) +UNARY_HALF2_NEGATIVE_KERNELS(h2ceil) +UNARY_HALF2_NEGATIVE_KERNELS(h2floor) +UNARY_HALF2_NEGATIVE_KERNELS(h2trunc) +UNARY_HALF2_NEGATIVE_KERNELS(h2rcp) +UNARY_HALF2_NEGATIVE_KERNELS(h2rsqrt) +UNARY_HALF2_NEGATIVE_KERNELS(h2rint) diff --git a/catch/unit/math/integer_intrinsics.cc b/catch/unit/math/integer_intrinsics.cc new file mode 100644 index 0000000000..ee9cd760a8 --- /dev/null +++ b/catch/unit/math/integer_intrinsics.cc @@ -0,0 +1,794 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +__global__ void __brev_kernel(unsigned int* y, unsigned int x) { y[0] = __brev(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__brev(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___brev_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + __brev_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAA); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0x55555555); +} + +__global__ void __brevll_kernel(unsigned long long int* y, unsigned long long int x) { + y[0] = __brevll(x); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__brevll(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___brevll_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long int)); + + __brevll_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAAAAAAAAAA); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0x5555555555555555); +} + +template __global__ void __clz_kernel(T* y, T x) { y[0] = __clz(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__clz(x)`. Run for `int` and `unsigned int` overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___clz_Sanity_Positive", "", int, unsigned int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __clz_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 32); + + TestType x = 1; + for (int i = 0; i < 32; ++i) { + __clz_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 31 - i); + } +} + +template __global__ void __clzll_kernel(T* y, T x) { y[0] = __clzll(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__clzll(x)`. Run for `long long int` and `unsigned long long int` + * overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___clzll_Sanity_Positive", "", long long int, + unsigned long long int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __clzll_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 64); + + TestType x = 1; + for (int i = 0; i < 64; ++i) { + __clzll_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 63 - i); + } +} + +template __global__ void __ffs_kernel(T* y, T x) { y[0] = __ffs(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__ffs(x)`. Run for `int` and `unsigned int` overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___ffs_Sanity_Positive", "", int, unsigned int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __ffs_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + TestType x = 1; + for (int i = 0; i < 32; ++i) { + __ffs_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +template __global__ void __ffsll_kernel(T* y, T x) { y[0] = __ffsll(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__ffsll(x)`. Run for `long long int` and `unsigned long long int` + * overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___ffsll_Sanity_Positive", "", long long int, + unsigned long long int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __ffsll_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + TestType x = 1; + for (int i = 0; i < 64; ++i) { + __ffsll_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __popc_kernel(unsigned int* y, unsigned int x) { y[0] = __popc(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__popc(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___popc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + __popc_kernel<<<1, 1>>>(y.ptr(), 0); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + unsigned int x = 0; + for (int i = 0; i < 32; ++i) { + __popc_kernel<<<1, 1>>>(y.ptr(), x |= (1u << i)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __popcll_kernel(unsigned long long int* y, unsigned long long int x) { + y[0] = __popcll(x); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__popcll(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___popcll_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long int)); + + __popcll_kernel<<<1, 1>>>(y.ptr(), 0); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + unsigned long long int x = 0; + for (int i = 0; i < 64; ++i) { + __popcll_kernel<<<1, 1>>>(y.ptr(), x |= (1ull << i)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __mul24_kernel(int* y, int x1, int x2) { y[0] = __mul24(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mul24(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mul24_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mul24_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == x1 * x2); +} + +__global__ void __umul24_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __umul24(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umul24(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umul24_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFF); + + __umul24_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == x1 * x2); +} + +__global__ void __funnelshift_l_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_l(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_l(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_l_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_l_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast((hi_lo << (shift & 31)) >> 32)); + } +} + +__global__ void __funnelshift_lc_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_lc(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_lc(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_lc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_lc_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast((hi_lo << std::min(shift, 32u)) >> 32)); + } +} + +__global__ void __funnelshift_r_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_r(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_r(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_r_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_r_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast(hi_lo >> (shift & 31))); + } +} + +__global__ void __funnelshift_rc_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_rc(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_rc(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_rc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_rc_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast(hi_lo >> std::min(shift, 32u))); + } +} + +__global__ void __hadd_kernel(int* y, int x1, int x2) { y[0] = __hadd(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__hadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___hadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __hadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == static_cast((static_cast(x1) + x2) >> 1)); +} + +__global__ void __uhadd_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __uhadd(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__uhadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uhadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __uhadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == static_cast((static_cast(x1) + x2) >> 1)); +} + +__global__ void __rhadd_kernel(int* y, int x1, int x2) { y[0] = __rhadd(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__rhadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___rhadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __rhadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == static_cast((static_cast(x1) + x2 + 1) >> 1)); +} + +__global__ void __urhadd_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __urhadd(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__urhadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___urhadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __urhadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast((static_cast(x1) + x2 + 1) >> 1)); +} + +__global__ void __mulhi_kernel(int* y, int x1, int x2) { y[0] = __mulhi(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mulhi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mulhi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mulhi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast((static_cast(x1) * static_cast(x2)) >> 32)); +} + +__global__ void __umulhi_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __umulhi(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umulhi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umulhi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __umulhi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast((static_cast(x1) * x2) >> 32)); +} + +__global__ void __mul64hi_kernel(long long* y, long long x1, long long x2) { + y[0] = __mul64hi(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mul64hi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mul64hi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(long long)); + + long long x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + long long x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mul64hi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE( + y.ptr()[0] == + static_cast((static_cast<__int128_t>(x1) * static_cast<__int128_t>(x2)) >> 64)); +} + +__global__ void __umul64hi_kernel(unsigned long long* y, unsigned long long x1, + unsigned long long x2) { + y[0] = __umul64hi(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umul64hi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umul64hi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long)); + + unsigned long long x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned long long x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __umul64hi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast( + (static_cast<__uint128_t>(x1) * static_cast<__uint128_t>(x2)) >> 64)); +} + +__global__ void __sad_kernel(unsigned int* y, int x1, int x2, unsigned int x3) { + y[0] = __sad(x1, x2, x3); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__sad(x,y,z)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___sad_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + unsigned int x3 = GENERATE(0, 42, 0xFFFFFFFF); + + __sad_kernel<<<1, 1>>>(y.ptr(), x1, x2, x3); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == (static_cast(std::abs(x1 - x2)) + x3)); +} + +__global__ void __usad_kernel(unsigned int* y, unsigned int x1, unsigned int x2, unsigned int x3) { + y[0] = __usad(x1, x2, x3); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__usad(x,y,z)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___usad_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x3 = GENERATE(0, 42, 0xFFFFFFFF); + + __usad_kernel<<<1, 1>>>(y.ptr(), x1, x2, x3); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + (static_cast( + std::abs(static_cast(x1) - static_cast(x2))) + + x3)); +} + +__global__ void __byte_perm(unsigned int* y, unsigned int x1, unsigned int x2, unsigned int s) { + y[0] = __byte_perm(x1, x2, s); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__byte_perm(x,y,s)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___byte_perm_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int bytes[] = {0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; + + unsigned int x1 = (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | bytes[0]; + unsigned int x2 = (bytes[7] << 24) | (bytes[6] << 16) | (bytes[5] << 8) | bytes[4]; + + unsigned int s0 = GENERATE(0, 1); + unsigned int s1 = GENERATE(2, 3); + unsigned int s2 = GENERATE(4, 5); + unsigned int s3 = GENERATE(6, 7); + + unsigned int s = (s3 << 12) | (s2 << 8) | (s1 << 4) | s0; + + __byte_perm<<<1, 1>>>(y.ptr(), x1, x2, s); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int expected = (bytes[s3] << 24) | (bytes[s2] << 16) | (bytes[s1] << 8) | bytes[s0]; + REQUIRE(y.ptr()[0] == expected); +} \ No newline at end of file diff --git a/catch/unit/math/integer_intrinsics_negative_kernels.cc b/catch/unit/math/integer_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..ec5ac98fe3 --- /dev/null +++ b/catch/unit/math/integer_intrinsics_negative_kernels.cc @@ -0,0 +1,67 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(int* x) { int result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { int result = func_name(x); } + +#define INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(long long int* x) { long long int result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { long long int result = func_name(x); } + +#define INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(int* x, int y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(int x, int* y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, int y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(int x, Dummy y) { int result = func_name(x, y); } + +#define INTRINSIC_BINARY_LONGLONG_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(long long int* x, long long int y) { \ + long long int result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(long long int x, long long int* y) { \ + long long int result = func_name##(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, long long int y) { \ + long long int result = func_name##(x, y); \ + } \ + __global__ void func_name##_kernel_v4(long long int x, Dummy y) { \ + long long int result = func_name##(x, y); \ + } + +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__brev) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__clz) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__ffs) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__popc) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__brevll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__clzll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__ffsll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__popcll) +INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(__mul24) \ No newline at end of file diff --git a/catch/unit/math/log_funcs.cc b/catch/unit/math/log_funcs.cc new file mode 100644 index 0000000000..83ec1806f3 --- /dev/null +++ b/catch/unit/math/log_funcs.cc @@ -0,0 +1,260 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "math_log_negative_kernels_rtc.hh" + +/** + * @addtogroup LogMathFuncs LogMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `logf(x)` for all possible inputs and `log(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for logf and log. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log_logf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `log2f(x)` for all possible inputs and `log2(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log2(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log2, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for log2f and log2. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log2_log2f_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog2); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `log10f(x)` for all possible inputs and `log10(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log10(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log10, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for log10f and log10. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log10_log10f_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog10); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `log1pf(x)` for all possible inputs and `log1p(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log1p(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log1p, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for log1pf and log1p. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log1p_log1pf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog1p); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `logb(x)` for all possible inputs and `logb(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::logb(T)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(logb, 0, 0) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for logbf and logb. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_logb_logbf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLogb); } + + +template +__global__ void ilogb_kernel(int* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i] = ilogbf(xs[i]); + } else if constexpr (std::is_same_v) { + ys[i] = ilogb(xs[i]); + } + } +} + +template int ilogb_ref(T arg) { + if (arg == 0) { + return std::numeric_limits::min(); + } else if (std::isnan(arg)) { + return std::numeric_limits::min(); + } else if (std::isinf(arg)) { + return std::numeric_limits::max(); + } else { + return std::ilogb(arg); + } +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ilogbf(x)` for all possible inputs. The results are + * compared against reference function `int std::ilogb(double)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ilogbf_Accuracy_Positive") { + UnarySinglePrecisionTest(ilogb_kernel, ilogb_ref, + EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ilogb(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `int std::ilogb(long double)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ilogb_Accuracy_Positive") { + UnaryDoublePrecisionTest(ilogb_kernel, ilogb_ref, + EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for ilogbf and ilogb. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ilogb_ilogbf_Negative_RTC") { NegativeTestRTCWrapper<4>(kIlogb); } diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh new file mode 100644 index 0000000000..4f7e5ddd29 --- /dev/null +++ b/catch/unit/math/math_common.hh @@ -0,0 +1,256 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +#include + +#include "Float16.hh" +#include "thread_pool.hh" +#include "validators.hh" + +namespace cg = cooperative_groups; + +template +std::enable_if_t, std::is_arithmetic>, std::ostream&> +operator<<(std::ostream& os, const std::pair& p) { + const auto default_prec = os.precision(); + return os << "<" << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.first << ", " + << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.second << ">" + << std::setprecision(default_prec); +} + +template +std::enable_if_t, std::ostream&> +operator<<(std::ostream& os, const T& p) { + const auto default_prec = os.precision(); + return os << "<" << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.x << ", " + << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.y << ">" + << std::setprecision(default_prec); +} + +// This class represents a generic numerical accuracy math test. Template parameter T is the output +// type of the function being tested, and template parameter pack Ts represents the input types. The +// constructor takes a kernel with the signature void(T*, const size_t, Ts*...). The first kernel +// parameter is the output array, the second parameter is the number of outputs, and the rest of the +// parameters are arrays containing input values. The number of input arrays depends on the arity of +// the function being tested e.g. one input array for unary functions, two input arrays for binary +// functions, etc. The kernel threads take one element from each input array at the index +// corresponding to that thread, feed the input elements to the testee function, and store the +// result in the output array at the corresponding index. +// +// E.g. for a binary function the kernel would have the following signature: +// void kernel(float* y, const size_t n, float* x1, float* x2) +// +// The outputs would be calculated in parallel the following way: +// y[0] = testee(x1[0], x2[0]) +// y[1] = testee(x1[1], x2[1]) +// y[2] = testee(x1[2], x2[2]) +// ... +// +// The constructor also takes max_num_args, which represents the maximum number of input values used +// for one kernel launch. The device memory for the input and output arrays is allocated based on +// that number. +template class MathTest { + public: + MathTest(void (*kernel)(T*, const size_t, Ts*...), const size_t max_num_args) + : kernel_{kernel}, + xss_dev_(LinearAllocGuard(LinearAllocs::hipMalloc, max_num_args * sizeof(Ts))...), + y_dev_{LinearAllocs::hipMalloc, max_num_args * sizeof(T)}, + y_{LinearAllocs::hipHostMalloc, max_num_args * sizeof(T)} {} + + // This method runs the test with the following steps: + // 1. Copy the values from the input arrays provided in the parameter pack xss to device memory + // 2. Launch the kernel using the configuration provided in grid_dims and block_dims + // 3. Copy the outputs back to host memory + // 4. Generate the reference values using ref_func and compare against the outputs using the + // validator provided by validator_builder + // 5. If non-type template parameter parallel is true, then step 4 is broken up into chunks of + // work that are done in parallel on the host. + template + void Run(const ValidatorBuilder& validator_builder, const size_t grid_dims, + const size_t block_dims, RT (*const ref_func)(RTs...), const size_t num_args, + const Ts*... xss) { + fail_flag_.store(false); + error_info_.clear(); + RunImpl(validator_builder, grid_dims, block_dims, ref_func, num_args, + std::index_sequence_for{}, xss...); + } + + private: + void (*kernel_)(T*, const size_t, Ts*...); + std::tuple...> xss_dev_; + LinearAllocGuard y_dev_; + LinearAllocGuard y_; + std::atomic fail_flag_{false}; + std::mutex mtx_; + std::string error_info_; + + template + void RunImpl(const ValidatorBuilder& validator_builder, const size_t grid_dim, + const size_t block_dim, RT (*const ref_func)(RTs...), const size_t num_args, + std::index_sequence, const Ts*... xss) { + const auto xss_tup = std::make_tuple(xss...); + + constexpr auto f = [](auto dst, auto src, size_t size) { + HIP_CHECK(hipMemcpy(dst, src, size, hipMemcpyHostToDevice)) + }; + + ((f(std::get(xss_dev_).ptr(), std::get(xss_tup), + num_args * sizeof(*std::get(xss_tup)))), + ...); + + kernel_<<>>(y_dev_.ptr(), num_args, std::get(xss_dev_).ptr()...); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y_.ptr(), y_dev_.ptr(), num_args * sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipStreamSynchronize(nullptr)); + + if constexpr (!parallel) { + for (auto i = 0u; i < num_args; ++i) { + const auto actual_val = y_.ptr()[i]; + const auto ref_val = static_cast(ref_func(xss[i]...)); + const auto validator = validator_builder(ref_val, xss[i]...); + + if (!validator->match(actual_val)) { + const auto log = MakeLogMessage(actual_val, xss[i]...) + validator->describe() + "\n"; + INFO(log); + REQUIRE(false); + } + } + + return; + } + + const auto task = [&, this](size_t iters, size_t base_idx) { + for (auto i = 0u; i < iters; ++i) { + if (fail_flag_.load(std::memory_order_relaxed)) return; + + const auto actual_val = y_.ptr()[base_idx + i]; + const auto ref_val = static_cast(ref_func(xss[base_idx + i]...)); + const auto validator = validator_builder(ref_val, xss[base_idx + i]...); + + if (!validator->match(actual_val)) { + fail_flag_.store(true, std::memory_order_relaxed); + // Several threads might have passed the first check, but failed validation. On the + // chance of this happening, access to the string stream must be serialized. + const auto log = + MakeLogMessage(actual_val, xss[base_idx + i]...) + validator->describe() + "\n"; + { + std::lock_guard lg{mtx_}; + error_info_ += log; + } + return; + } + } + }; + + const auto task_count = thread_pool.thread_count(); + const auto chunk_size = num_args / task_count; + const auto tail = num_args % task_count; + + auto base_idx = 0u; + for (auto i = 0u; i < task_count; ++i) { + const auto iters = chunk_size + (i < tail); + thread_pool.Post([=, &task] { task(iters, base_idx); }); + base_idx += iters; + } + + thread_pool.Wait(); + + INFO(error_info_); + REQUIRE(!fail_flag_); + } + + template std::string MakeLogMessage(T actual_val, Args... args) { + std::stringstream ss; + ss << "Input value(s): " << std::scientific + << std::setprecision(std::numeric_limits::max_digits10 - 1); + ((ss << " " << args), ...) << "\n" << actual_val << " "; + + return ss.str(); + } +}; + +template struct RefType {}; + +template <> struct RefType { using type = float; }; + +template <> struct RefType { using type = double; }; + +template <> struct RefType { using type = long double; }; + +template using RefType_t = typename RefType::type; + +template auto GetOccupancyMaxPotentialBlockSize(F kernel) { + int grid_size = 0, block_size = 0; + HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&grid_size, &block_size, kernel, 0, 0)); + return std::make_tuple(grid_size, block_size); +} + +inline size_t GetMaxAllowedDeviceMemoryUsage() { + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, 0)); + return props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f); +} + +inline uint64_t GetTestIterationCount() { return cmd_options.accuracy_iterations; } + +template using kernel_sig = void (*)(T*, const size_t, Ts*...); + +template using ref_sig = T (*)(Ts...); + +template void NegativeTestRTCWrapper(const char* program_source) { + hiprtcProgram program{}; + + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "math_test_rtc.cc", 0, nullptr, nullptr)); +#if HT_AMD + std::string args = std::string("-ferror-limit=200"); + const char* options[] = {args.c_str()}; + hiprtcResult result{hiprtcCompileProgram(program, 1, options)}; +#else + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; +#endif + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{error_num}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/math/math_log_negative_kernels.cc b/catch/unit/math/math_log_negative_kernels.cc new file mode 100644 index 0000000000..732fc62a08 --- /dev/null +++ b/catch/unit/math/math_log_negative_kernels.cc @@ -0,0 +1,39 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +NEGATIVE_KERNELS_SHELL(log) +NEGATIVE_KERNELS_SHELL(log2) +NEGATIVE_KERNELS_SHELL(log10) +NEGATIVE_KERNELS_SHELL(log1p) +NEGATIVE_KERNELS_SHELL(logb) +NEGATIVE_KERNELS_SHELL(ilogb) diff --git a/catch/unit/math/math_log_negative_kernels_rtc.hh b/catch/unit/math/math_log_negative_kernels_rtc.hh new file mode 100644 index 0000000000..fd1cbdfcaf --- /dev/null +++ b/catch/unit/math/math_log_negative_kernels_rtc.hh @@ -0,0 +1,96 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math log negative Test Cases that are using RTC. +*/ + +static constexpr auto kLog{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log_kernel_v1(double* x) { double result = log(x); } + __global__ void log_kernel_v2(Dummy x) { double result = log(x); } + __global__ void logf_kernel_v1(float* x) { float result = logf(x); } + __global__ void logf_kernel_v2(Dummy x) { float result = logf(x); } +)"}; + +static constexpr auto kLog2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log2_kernel_v1(double* x) { double result = log2(x); } + __global__ void log2_kernel_v2(Dummy x) { double result = log2(x); } + __global__ void log2f_kernel_v1(float* x) { float result = log2f(x); } + __global__ void log2f_kernel_v2(Dummy x) { float result = log2f(x); } +)"}; + +static constexpr auto kLog10{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log10_kernel_v1(double* x) { double result = log10(x); } + __global__ void log10_kernel_v2(Dummy x) { double result = log10(x); } + __global__ void log10f_kernel_v1(float* x) { float result = log10f(x); } + __global__ void log10f_kernel_v2(Dummy x) { float result = log10f(x); } +)"}; + +static constexpr auto kLog1p{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log1p_kernel_v1(double* x) { double result = log1p(x); } + __global__ void log1p_kernel_v2(Dummy x) { double result = log1p(x); } + __global__ void log1pf_kernel_v1(float* x) { float result = log1pf(x); } + __global__ void log1pf_kernel_v2(Dummy x) { float result = log1pf(x); } +)"}; + +static constexpr auto kLogb{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void logb_kernel_v1(double* x) { double result = logb(x); } + __global__ void logb_kernel_v2(Dummy x) { double result = logb(x); } + __global__ void logbf_kernel_v1(float* x) { float result = logbf(x); } + __global__ void logbf_kernel_v2(Dummy x) { float result = logbf(x); } +)"}; + +static constexpr auto kIlogb{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ilogb_kernel_v1(double* x) { double result = ilogb(x); } + __global__ void ilogb_kernel_v2(Dummy x) { double result = ilogb(x); } + __global__ void ilogbf_kernel_v1(float* x) { float result = ilogbf(x); } + __global__ void ilogbf_kernel_v2(Dummy x) { float result = ilogbf(x); } +)"}; diff --git a/catch/unit/math/math_pow_negative_kernels.cc b/catch/unit/math/math_pow_negative_kernels.cc new file mode 100644 index 0000000000..c338e744a9 --- /dev/null +++ b/catch/unit/math/math_pow_negative_kernels.cc @@ -0,0 +1,92 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_EXP(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_INT_2ND(func_name) \ + __global__ void func_name##_kernel_v1(double* x, int e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v2(Dummy x, int e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v3(double x, int* e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v4(double x, Dummy e) { double result = func_name(x, e); } \ + __global__ void func_name##f_kernel_v1(float* x, int e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v2(Dummy x, int e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v3(float x, int* e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy e) { float result = func_name##f(x, e); } + + +NEGATIVE_KERNELS_SHELL_EXP(exp) +NEGATIVE_KERNELS_SHELL_EXP(exp2) +NEGATIVE_KERNELS_SHELL_EXP(exp10) +NEGATIVE_KERNELS_SHELL_EXP(expm1) + +__global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); } +__global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); } + +NEGATIVE_KERNELS_SHELL_INT_2ND(ldexp) + +__global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); } +__global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); } +__global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); } +__global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); } +__global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); } +__global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); } +__global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); } +__global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); } + +NEGATIVE_KERNELS_SHELL_INT_2ND(powi) +NEGATIVE_KERNELS_SHELL_INT_2ND(scalbn) + +__global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); } +__global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); } diff --git a/catch/unit/math/math_pow_negative_kernels_rtc.hh b/catch/unit/math/math_pow_negative_kernels_rtc.hh new file mode 100644 index 0000000000..7c48640bec --- /dev/null +++ b/catch/unit/math/math_pow_negative_kernels_rtc.hh @@ -0,0 +1,150 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math pow negative Test Cases that are using RTC. +*/ + +static constexpr auto kExp{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp_kernel_v1(double* x) { double result = exp(x); } + __global__ void exp_kernel_v2(Dummy x) { double result = exp(x); } + __global__ void expf_kernel_v1(float* x) { float result = expf(x); } + __global__ void expf_kernel_v2(Dummy x) { float result = expf(x); } +)"}; + +static constexpr auto kExp2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp2_kernel_v1(double* x) { double result = exp2(x); } + __global__ void exp2_kernel_v2(Dummy x) { double result = exp2(x); } + __global__ void exp2f_kernel_v1(float* x) { float result = exp2f(x); } + __global__ void exp2f_kernel_v2(Dummy x) { float result = exp2f(x); } +)"}; + +static constexpr auto kExp10{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp10_kernel_v1(double* x) { double result = exp10(x); } + __global__ void exp10_kernel_v2(Dummy x) { double result = exp10(x); } + __global__ void exp10f_kernel_v1(float* x) { float result = exp10f(x); } + __global__ void exp10f_kernel_v2(Dummy x) { float result = exp10f(x); } +)"}; + +static constexpr auto kExpm1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void expm1_kernel_v1(double* x) { double result = expm1(x); } + __global__ void expm1_kernel_v2(Dummy x) { double result = expm1(x); } + __global__ void expm1f_kernel_v1(float* x) { float result = expm1f(x); } + __global__ void expm1f_kernel_v2(Dummy x) { float result = expm1f(x); } +)"}; + +static constexpr auto kFrexp{R"( + __global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); } + __global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); } +)"}; + +static constexpr auto kLdexp{R"( + __global__ void ldexp_kernel_v1(double* x, int e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v2(Dummy x, int e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v3(double x, int* e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v4(double x, Dummy e) { double result = ldexp(x, e); } + __global__ void ldexpf_kernel_v1(float* x, int e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v2(Dummy x, int e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v3(float x, int* e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v4(float x, Dummy e) { float result = ldexpf(x, e); } +)"}; + +static constexpr auto kPow{R"( + __global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); } + __global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); } + __global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); } + __global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); } + __global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); } + __global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); } + __global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); } + __global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); } +)"}; + +static constexpr auto kPowi{R"( + __global__ void powi_kernel_v1(double* x, int e) { double result = powi(x, e); } + __global__ void powi_kernel_v2(Dummy x, int e) { double result = powi(x, e); } + __global__ void powi_kernel_v3(double x, int* e) { double result = powi(x, e); } + __global__ void powi_kernel_v4(double x, Dummy e) { double result = powi(x, e); } + __global__ void powif_kernel_v1(float* x, int e) { float result = powif(x, e); } + __global__ void powif_kernel_v2(Dummy x, int e) { float result = powif(x, e); } + __global__ void powif_kernel_v3(float x, int* e) { float result = powif(x, e); } + __global__ void powif_kernel_v4(float x, Dummy e) { float result = powif(x, e); } +)"}; + +static constexpr auto kScalbn{R"( + __global__ void scalbn_kernel_v1(double* x, int e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v2(Dummy x, int e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v3(double x, int* e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v4(double x, Dummy e) { double result = scalbn(x, e); } + __global__ void scalbnf_kernel_v1(float* x, int e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v2(Dummy x, int e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v3(float x, int* e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v4(float x, Dummy e) { float result = scalbnf(x, e); } +)"}; + +static constexpr auto kScalbln{R"( + __global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); } + __global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); } +)"}; diff --git a/catch/unit/math/math_remainder_negative_kernels.cc b/catch/unit/math/math_remainder_negative_kernels.cc new file mode 100644 index 0000000000..2ebd26516e --- /dev/null +++ b/catch/unit/math/math_remainder_negative_kernels.cc @@ -0,0 +1,113 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { auto result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { auto result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { auto result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { auto result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(float* x, float y) { auto result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(float x, float* y) { auto result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { auto result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { auto result = func_name##f(x, y); } + +NEGATIVE_KERNELS_SHELL(fmod) +NEGATIVE_KERNELS_SHELL(remainder) + +__global__ void remquo_kernel_v1(double* x, double y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v2(Dummy x, double y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v3(double x, double* y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v4(double x, Dummy y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v5(double x, double y, char* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v6(double x, double y, short* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v7(double x, double y, long* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v8(double x, double y, long long* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v9(double x, double y, float* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v10(double x, double y, double* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v11(double x, double y, Dummy* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v12(double x, double y, const int* quo) { + auto result = remquo(x, y, quo); +} + +__global__ void remquof_kernel_v1(float* x, float y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v2(Dummy x, float y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v3(float x, float* y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v4(float x, Dummy y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v5(float x, float y, char* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v6(float x, float y, short* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v7(float x, float y, long* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v8(float x, float y, long long* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v9(float x, float y, float* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v10(float x, float y, double* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v11(float x, float y, Dummy* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v12(float x, float y, const int* quo) { + auto result = remquof(x, y, quo); +} + +__global__ void modf_kernel_v1(double* x, double* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v2(Dummy x, double* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v3(double x, int* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v4(double x, char* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v5(double x, short* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v6(double x, long* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v7(double x, long long* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v8(double x, float* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v9(double x, Dummy* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v10(double x, const double* iptr) { auto result = modf(x, iptr); } + +__global__ void modff_kernel_v1(float* x, float* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v2(Dummy x, float* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v3(float x, int* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v4(float x, char* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v5(float x, short* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v6(float x, long* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v7(float x, long long* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v8(float x, double* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v9(float x, Dummy* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v10(float x, const float* iptr) { auto result = modff(x, iptr); } + +NEGATIVE_KERNELS_SHELL(fdim) diff --git a/catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh b/catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh new file mode 100644 index 0000000000..e67a6d0092 --- /dev/null +++ b/catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh @@ -0,0 +1,276 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math remainder and rounding negative Test Cases that are using RTC. +*/ + +static constexpr auto kTrunc{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void trunc_kernel_v1(double* x) { auto result = trunc(x); } + __global__ void trunc_kernel_v2(Dummy x) { auto result = trunc(x); } + __global__ void truncf_kernel_v1(float* x) { auto result = truncf(x); } + __global__ void truncf_kernel_v2(Dummy x) { auto result = truncf(x); } +)"}; + +static constexpr auto kRound{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void round_kernel_v1(double* x) { auto result = round(x); } + __global__ void round_kernel_v2(Dummy x) { auto result = round(x); } + __global__ void roundf_kernel_v1(float* x) { auto result = roundf(x); } + __global__ void roundf_kernel_v2(Dummy x) { auto result = roundf(x); } +)"}; + +static constexpr auto kRint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rint_kernel_v1(double* x) { auto result = rint(x); } + __global__ void rint_kernel_v2(Dummy x) { auto result = rint(x); } + __global__ void rintf_kernel_v1(float* x) { auto result = rintf(x); } + __global__ void rintf_kernel_v2(Dummy x) { auto result = rintf(x); } +)"}; + +static constexpr auto kNearbyint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void nearbyint_kernel_v1(double* x) { auto result = nearbyint(x); } + __global__ void nearbyint_kernel_v2(Dummy x) { auto result = nearbyint(x); } + __global__ void nearbyintf_kernel_v1(float* x) { auto result = nearbyintf(x); } + __global__ void nearbyintf_kernel_v2(Dummy x) { auto result = nearbyintf(x); } +)"}; + +static constexpr auto kCeil{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ceil_kernel_v1(double* x) { auto result = ceil(x); } + __global__ void ceil_kernel_v2(Dummy x) { auto result = ceil(x); } + __global__ void ceilf_kernel_v1(float* x) { auto result = ceilf(x); } + __global__ void ceilf_kernel_v2(Dummy x) { auto result = ceilf(x); } +)"}; + +static constexpr auto kFloor{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void floor_kernel_v1(double* x) { auto result = floor(x); } + __global__ void floor_kernel_v2(Dummy x) { auto result = floor(x); } + __global__ void floorf_kernel_v1(float* x) { auto result = floorf(x); } + __global__ void floorf_kernel_v2(Dummy x) { auto result = floorf(x); } +)"}; + +static constexpr auto kLrint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void lrint_kernel_v1(double* x) { auto result = lrint(x); } + __global__ void lrint_kernel_v2(Dummy x) { auto result = lrint(x); } + __global__ void lrintf_kernel_v1(float* x) { auto result = lrintf(x); } + __global__ void lrintf_kernel_v2(Dummy x) { auto result = lrintf(x); } +)"}; + +static constexpr auto kLround{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void lround_kernel_v1(double* x) { auto result = lround(x); } + __global__ void lround_kernel_v2(Dummy x) { auto result = lround(x); } + __global__ void lroundf_kernel_v1(float* x) { auto result = lroundf(x); } + __global__ void lroundf_kernel_v2(Dummy x) { auto result = lroundf(x); } +)"}; + +static constexpr auto kLlrint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void llrint_kernel_v1(double* x) { auto result = llrint(x); } + __global__ void llrint_kernel_v2(Dummy x) { auto result = llrint(x); } + __global__ void llrintf_kernel_v1(float* x) { auto result = llrintf(x); } + __global__ void llrintf_kernel_v2(Dummy x) { auto result = llrintf(x); } +)"}; + +static constexpr auto kLlround{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void llround_kernel_v1(double* x) { auto result = llround(x); } + __global__ void llround_kernel_v2(Dummy x) { auto result = llround(x); } + __global__ void llroundf_kernel_v1(float* x) { auto result = llroundf(x); } + __global__ void llroundf_kernel_v2(Dummy x) { auto result = llroundf(x); } +)"}; + +static constexpr auto kFmod{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void fmod_kernel_v1(double* x, double y) { auto result = fmod(x, y); } + __global__ void fmod_kernel_v2(double x, double* y) { auto result = fmod(x, y); } + __global__ void fmod_kernel_v3(Dummy x, double y) { auto result = fmod(x, y); } + __global__ void fmod_kernel_v4(double x, Dummy y) { auto result = fmod(x, y); } + __global__ void fmodf_kernel_v1(float* x, float y) { auto result = fmodf(x, y); } + __global__ void fmodf_kernel_v2(float x, float* y) { auto result = fmodf(x, y); } + __global__ void fmodf_kernel_v3(Dummy x, float y) { auto result = fmodf(x, y); } + __global__ void fmodf_kernel_v4(float x, Dummy y) { auto result = fmodf(x, y); } +)"}; + +static constexpr auto kRemainder{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void remainder_kernel_v1(double* x, double y) { auto result = remainder(x, y); } + __global__ void remainder_kernel_v2(double x, double* y) { auto result = remainder(x, y); } + __global__ void remainder_kernel_v3(Dummy x, double y) { auto result = remainder(x, y); } + __global__ void remainder_kernel_v4(double x, Dummy y) { auto result = remainder(x, y); } + __global__ void remainderf_kernel_v1(float* x, float y) { auto result = remainderf(x, y); } + __global__ void remainderf_kernel_v2(float x, float* y) { auto result = remainderf(x, y); } + __global__ void remainderf_kernel_v3(Dummy x, float y) { auto result = remainderf(x, y); } + __global__ void remainderf_kernel_v4(float x, Dummy y) { auto result = remainderf(x, y); } +)"}; + +static constexpr auto kRemquo{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void remquo_kernel_v1(double* x, double y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v2(Dummy x, double y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v3(double x, double* y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v4(double x, Dummy y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v5(double x, double y, char* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v6(double x, double y, short* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v7(double x, double y, long* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v8(double x, double y, long long* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v9(double x, double y, float* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v10(double x, double y, double* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v11(double x, double y, Dummy* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v12(double x, double y, const int* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquof_kernel_v1(float* x, float y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v2(Dummy x, float y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v3(float x, float* y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v4(float x, Dummy y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v5(float x, float y, char* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v6(float x, float y, short* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v7(float x, float y, long* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v8(float x, float y, long long* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v9(float x, float y, float* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v10(float x, float y, double* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v11(float x, float y, Dummy* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v12(float x, float y, const int* quo) { + auto result = remquof(x, y, quo); + } +)"}; + +static constexpr auto kModf{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void modf_kernel_v1(double* x, double* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v2(Dummy x, double* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v3(double x, int* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v4(double x, char* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v5(double x, short* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v6(double x, long* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v7(double x, long long* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v8(double x, float* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v9(double x, Dummy* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v10(double x, const double* iptr) { auto result = modf(x, iptr); } + __global__ void modff_kernel_v1(float* x, float* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v2(Dummy x, float* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v3(float x, int* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v4(float x, char* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v5(float x, short* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v6(float x, long* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v7(float x, long long* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v8(float x, double* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v9(float x, Dummy* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v10(float x, const float* iptr) { auto result = modff(x, iptr); } +)"}; + +static constexpr auto kFdim{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void fdim_kernel_v1(double* x, double y) { auto result = fdim(x, y); } + __global__ void fdim_kernel_v2(double x, double* y) { auto result = fdim(x, y); } + __global__ void fdim_kernel_v3(Dummy x, double y) { auto result = fdim(x, y); } + __global__ void fdim_kernel_v4(double x, Dummy y) { auto result = fdim(x, y); } + __global__ void fdimf_kernel_v1(float* x, float y) { auto result = fdimf(x, y); } + __global__ void fdimf_kernel_v2(float x, float* y) { auto result = fdimf(x, y); } + __global__ void fdimf_kernel_v3(Dummy x, float y) { auto result = fdimf(x, y); } + __global__ void fdimf_kernel_v4(float x, Dummy y) { auto result = fdimf(x, y); } +)"}; diff --git a/catch/unit/math/math_root_negative_kernels_1Dand2D.cc b/catch/unit/math/math_root_negative_kernels_1Dand2D.cc new file mode 100644 index 0000000000..688eaa95be --- /dev/null +++ b/catch/unit/math/math_root_negative_kernels_1Dand2D.cc @@ -0,0 +1,107 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(float* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(float x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { float result = func_name##f(x, y); } + +#define NEGATIVE_KERNELS_SHELL_ARRAY_ARG(func_name) \ + __global__ void func_name##_kernel_v1(int* dim, const double* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v2(Dummy dim, const double* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v3(int dim, const int* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v4(int dim, const char* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v5(int dim, const short* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v6(int dim, const long* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v7(int dim, const long long* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v8(int dim, const float* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v9(int dim, const Dummy* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##f_kernel_v1(int* dim, const float* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v2(Dummy dim, const float* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v3(int dim, const int* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v4(int dim, const char* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v5(int dim, const short* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v6(int dim, const long* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v7(int dim, const long long* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v8(int dim, const double* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v9(int dim, const Dummy* a) { \ + double result = func_name##f(dim, a); \ + } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(sqrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(rsqrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cbrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(rcbrt) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(hypot) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(rhypot) +NEGATIVE_KERNELS_SHELL_ARRAY_ARG(norm) +NEGATIVE_KERNELS_SHELL_ARRAY_ARG(rnorm) diff --git a/catch/unit/math/math_root_negative_kernels_3Dand4D.cc b/catch/unit/math/math_root_negative_kernels_3Dand4D.cc new file mode 100644 index 0000000000..be8d206af6 --- /dev/null +++ b/catch/unit/math/math_root_negative_kernels_3Dand4D.cc @@ -0,0 +1,119 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_THREE_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(double x, double* y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(double x, double y, double* z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, double y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(double x, Dummy y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(double x, double y, Dummy z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v1(float* x, float y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v2(float x, float* y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v3(float x, float y, float* z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v4(Dummy x, float y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v5(float x, Dummy y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v6(float x, float y, Dummy z) { \ + float result = func_name##f(x, y, z); \ + } + +#define NEGATIVE_KERNELS_SHELL_FOUR_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v2(double x, double* y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v3(double x, double y, double* z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v4(double x, double y, double z, double* w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v5(Dummy x, double y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v6(double x, Dummy y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v7(double x, double y, Dummy z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v8(double x, double y, double z, Dummy w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v1(float* x, float y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v2(float x, float* y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v3(float x, float y, float* z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v4(float x, float y, float z, float* w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v5(Dummy x, float y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v6(float x, Dummy y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v7(float x, float y, Dummy z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v8(float x, float y, float z, Dummy w) { \ + float result = func_name##f(x, y, z, w); \ + } + +NEGATIVE_KERNELS_SHELL_THREE_ARGS(norm3d) +NEGATIVE_KERNELS_SHELL_THREE_ARGS(rnorm3d) +NEGATIVE_KERNELS_SHELL_FOUR_ARGS(norm4d) +NEGATIVE_KERNELS_SHELL_FOUR_ARGS(rnorm4d) diff --git a/catch/unit/math/math_root_negative_kernels_rtc.hh b/catch/unit/math/math_root_negative_kernels_rtc.hh new file mode 100644 index 0000000000..53507ee23c --- /dev/null +++ b/catch/unit/math/math_root_negative_kernels_rtc.hh @@ -0,0 +1,428 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math root negative Test Cases that are using RTC. +*/ + +static constexpr auto kSqrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sqrt_kernel_v1(double* x) { double result = sqrt(x); } + __global__ void sqrt_kernel_v2(Dummy x) { double result = sqrt(x); } + __global__ void sqrtf_kernel_v1(float* x) { float result = sqrtf(x); } + __global__ void sqrtf_kernel_v2(Dummy x) { float result = sqrtf(x); } +)"}; + +static constexpr auto kRsqrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rsqrt_kernel_v1(double* x) { double result = rsqrt(x); } + __global__ void rsqrt_kernel_v2(Dummy x) { double result = rsqrt(x); } + __global__ void rsqrtf_kernel_v1(float* x) { float result = rsqrtf(x); } + __global__ void rsqrtf_kernel_v2(Dummy x) { float result = rsqrtf(x); } +)"}; + +static constexpr auto kCbrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cbrt_kernel_v1(double* x) { double result = cbrt(x); } + __global__ void cbrt_kernel_v2(Dummy x) { double result = cbrt(x); } + __global__ void cbrtf_kernel_v1(float* x) { float result = cbrtf(x); } + __global__ void cbrtf_kernel_v2(Dummy x) { float result = cbrtf(x); } +)"}; + +static constexpr auto kRcbrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rcbrt_kernel_v1(double* x) { double result = rcbrt(x); } + __global__ void rcbrt_kernel_v2(Dummy x) { double result = rcbrt(x); } + __global__ void rcbrtf_kernel_v1(float* x) { float result = rcbrtf(x); } + __global__ void rcbrtf_kernel_v2(Dummy x) { float result = rcbrtf(x); } +)"}; + +static constexpr auto kHypot{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void hypot_kernel_v1(double* x, double y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v2(double x, double* y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v3(Dummy x, double y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v4(double x, Dummy y) { double result = hypot(x, y); } + __global__ void hypotf_kernel_v1(float* x, float y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v2(float x, float* y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v3(Dummy x, float y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v4(float x, Dummy y) { float result = hypotf(x, y); } +)"}; + +static constexpr auto kRhypot{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rhypot_kernel_v1(double* x, double y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v2(double x, double* y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v3(Dummy x, double y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v4(double x, Dummy y) { double result = rhypot(x, y); } + __global__ void rhypotf_kernel_v1(float* x, float y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v2(float x, float* y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v3(Dummy x, float y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v4(float x, Dummy y) { float result = rhypotf(x, y); } +)"}; + +static constexpr auto kNorm3D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm3d_kernel_v1(double* x, double y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v2(double x, double* y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v3(double x, double y, double* z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v4(Dummy x, double y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v5(double x, Dummy y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v6(double x, double y, Dummy z) { + double result = norm3d(x, y, z); + } + __global__ void norm3df_kernel_v1(float* x, float y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v2(float x, float* y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v3(float x, float y, float* z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v4(Dummy x, float y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v5(float x, Dummy y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v6(float x, float y, Dummy z) { + float result = norm3df(x, y, z); + } +)"}; + +static constexpr auto kRnorm3D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm3d_kernel_v1(double* x, double y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v2(double x, double* y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v3(double x, double y, double* z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v4(Dummy x, double y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v5(double x, Dummy y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v6(double x, double y, Dummy z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3df_kernel_v1(float* x, float y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v2(float x, float* y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v3(float x, float y, float* z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v4(Dummy x, float y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v5(float x, Dummy y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v6(float x, float y, Dummy z) { + float result = rnorm3df(x, y, z); + } +)"}; + +static constexpr auto kNorm4D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm4d_kernel_v1(double* x, double y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v2(double x, double* y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v3(double x, double y, double* z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v4(double x, double y, double z, double* w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v5(Dummy x, double y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v6(double x, Dummy y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v7(double x, double y, Dummy z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v8(double x, double y, double z, Dummy w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4df_kernel_v1(float* x, float y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v2(float x, float* y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v3(float x, float y, float* z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v4(float x, float y, float z, float* w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v5(Dummy x, float y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v6(float x, Dummy y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v7(float x, float y, Dummy z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v8(float x, float y, float z, Dummy w) { + float result = norm4df(x, y, z, w); + } +)"}; + +static constexpr auto kRnorm4D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm4d_kernel_v1(double* x, double y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v2(double x, double* y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v3(double x, double y, double* z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v4(double x, double y, double z, double* w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v5(Dummy x, double y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v6(double x, Dummy y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v7(double x, double y, Dummy z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v8(double x, double y, double z, Dummy w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4df_kernel_v1(float* x, float y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v2(float x, float* y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v3(float x, float y, float* z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v4(float x, float y, float z, float* w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v5(Dummy x, float y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v6(float x, Dummy y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v7(float x, float y, Dummy z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v8(float x, float y, float z, Dummy w) { + float result = rnorm4df(x, y, z, w); + } +)"}; + +static constexpr auto kNorm{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm_kernel_v1(int* dim, const double* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v2(Dummy dim, const double* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v3(int dim, const int* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v4(int dim, const char* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v5(int dim, const short* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v6(int dim, const long* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v7(int dim, const long long* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v8(int dim, const float* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v9(int dim, const Dummy* a) { + double result = norm(dim, a); + } + __global__ void normf_kernel_v1(int* dim, const float* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v2(Dummy dim, const float* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v3(int dim, const int* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v4(int dim, const char* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v5(int dim, const short* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v6(int dim, const long* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v7(int dim, const long long* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v8(int dim, const double* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v9(int dim, const Dummy* a) { + double result = normf(dim, a); + } +)"}; + +static constexpr auto kRnorm{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm_kernel_v1(int* dim, const double* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v2(Dummy dim, const double* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v3(int dim, const int* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v4(int dim, const char* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v5(int dim, const short* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v6(int dim, const long* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v7(int dim, const long long* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v8(int dim, const float* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v9(int dim, const Dummy* a) { + double result = rnorm(dim, a); + } + __global__ void rnormf_kernel_v1(int* dim, const float* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v2(Dummy dim, const float* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v3(int dim, const int* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v4(int dim, const char* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v5(int dim, const short* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v6(int dim, const long* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v7(int dim, const long long* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v8(int dim, const double* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v9(int dim, const Dummy* a) { + double result = rnormf(dim, a); + } +)"}; diff --git a/catch/unit/math/math_rounding_negative_kernels.cc b/catch/unit/math/math_rounding_negative_kernels.cc new file mode 100644 index 0000000000..857f50d5dd --- /dev/null +++ b/catch/unit/math/math_rounding_negative_kernels.cc @@ -0,0 +1,43 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { auto result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { auto result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { auto result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { auto result = func_name##f(x); } + +NEGATIVE_KERNELS_SHELL(trunc) +NEGATIVE_KERNELS_SHELL(round) +NEGATIVE_KERNELS_SHELL(rint) +NEGATIVE_KERNELS_SHELL(nearbyint) +NEGATIVE_KERNELS_SHELL(ceil) +NEGATIVE_KERNELS_SHELL(floor) +NEGATIVE_KERNELS_SHELL(lrint) +NEGATIVE_KERNELS_SHELL(lround) +NEGATIVE_KERNELS_SHELL(llrint) +NEGATIVE_KERNELS_SHELL(llround) diff --git a/catch/unit/math/math_special_func_kernels.cc b/catch/unit/math/math_special_func_kernels.cc new file mode 100644 index 0000000000..7dec752ddc --- /dev/null +++ b/catch/unit/math/math_special_func_kernels.cc @@ -0,0 +1,60 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(int* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(int x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(int x, Dummy y) { double result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(int* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(int x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(int x, Dummy y) { float result = func_name##f(x, y); } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(erf) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfc) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfinv) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfcinv) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfcx) +NEGATIVE_KERNELS_SHELL_ONE_ARG(normcdf) +NEGATIVE_KERNELS_SHELL_ONE_ARG(normcdfinv) +NEGATIVE_KERNELS_SHELL_ONE_ARG(lgamma) +NEGATIVE_KERNELS_SHELL_ONE_ARG(tgamma) +NEGATIVE_KERNELS_SHELL_ONE_ARG(j0) +NEGATIVE_KERNELS_SHELL_ONE_ARG(j1) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(jn) +NEGATIVE_KERNELS_SHELL_ONE_ARG(y0) +NEGATIVE_KERNELS_SHELL_ONE_ARG(y1) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(yn) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cyl_bessel_i0) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cyl_bessel_i1) diff --git a/catch/unit/math/math_special_func_kernels_rtc.hh b/catch/unit/math/math_special_func_kernels_rtc.hh new file mode 100644 index 0000000000..e829db2f3a --- /dev/null +++ b/catch/unit/math/math_special_func_kernels_rtc.hh @@ -0,0 +1,236 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math special function negative Test Cases that are using RTC. +*/ + +static constexpr auto kErf{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erf_kernel_v1(double* x) { double result = erf(x); } + __global__ void erf_kernel_v2(Dummy x) { double result = erf(x); } + __global__ void erff_kernel_v1(float* x) { float result = erff(x); } + __global__ void erff_kernel_v2(Dummy x) { float result = erff(x); } +)"}; + +static constexpr auto kErfc{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfc_kernel_v1(double* x) { double result = erfc(x); } + __global__ void erfc_kernel_v2(Dummy x) { double result = erfc(x); } + __global__ void erfcf_kernel_v1(float* x) { float result = erfcf(x); } + __global__ void erfcf_kernel_v2(Dummy x) { float result = erfcf(x); } +)"}; + +static constexpr auto kErfinv{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfinv_kernel_v1(double* x) { double result = erfinv(x); } + __global__ void erfinv_kernel_v2(Dummy x) { double result = erfinv(x); } + __global__ void erfinvf_kernel_v1(float* x) { float result = erfinvf(x); } + __global__ void erfinvf_kernel_v2(Dummy x) { float result = erfinvf(x); } +)"}; + +static constexpr auto kErfcinv{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfcinv_kernel_v1(double* x) { double result = erfcinv(x); } + __global__ void erfcinv_kernel_v2(Dummy x) { double result = erfcinv(x); } + __global__ void erfcinvf_kernel_v1(float* x) { float result = erfcinvf(x); } + __global__ void erfcinvf_kernel_v2(Dummy x) { float result = erfcinvf(x); } +)"}; + +static constexpr auto kErfcx{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfcx_kernel_v1(double* x) { double result = erfcx(x); } + __global__ void erfcx_kernel_v2(Dummy x) { double result = erfcx(x); } + __global__ void erfcxf_kernel_v1(float* x) { float result = erfcxf(x); } + __global__ void erfcxf_kernel_v2(Dummy x) { float result = erfcxf(x); } +)"}; + +static constexpr auto kNormcdf{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void normcdf_kernel_v1(double* x) { double result = normcdf(x); } + __global__ void normcdf_kernel_v2(Dummy x) { double result = normcdf(x); } + __global__ void normcdff_kernel_v1(float* x) { float result = normcdff(x); } + __global__ void normcdff_kernel_v2(Dummy x) { float result = normcdff(x); } +)"}; + +static constexpr auto kNormcdfinv{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void normcdfinv_kernel_v1(double* x) { double result = normcdfinv(x); } + __global__ void normcdfinv_kernel_v2(Dummy x) { double result = normcdfinv(x); } + __global__ void normcdfinvf_kernel_v1(float* x) { float result = normcdfinvf(x); } + __global__ void normcdfinvf_kernel_v2(Dummy x) { float result = normcdfinvf(x); } +)"}; + +static constexpr auto kLgamma{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void lgamma_kernel_v1(double* x) { double result = lgamma(x); } + __global__ void lgamma_kernel_v2(Dummy x) { double result = lgamma(x); } + __global__ void lgammaf_kernel_v1(float* x) { float result = lgammaf(x); } + __global__ void lgammaf_kernel_v2(Dummy x) { float result = lgammaf(x); } +)"}; + +static constexpr auto kTgamma{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tgamma_kernel_v1(double* x) { double result = tgamma(x); } + __global__ void tgamma_kernel_v2(Dummy x) { double result = tgamma(x); } + __global__ void tgammaf_kernel_v1(float* x) { float result = tgammaf(x); } + __global__ void tgammaf_kernel_v2(Dummy x) { float result = tgammaf(x); } +)"}; + +static constexpr auto kJ0{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void j0_kernel_v1(double* x) { double result = j0(x); } + __global__ void j0_kernel_v2(Dummy x) { double result = j0(x); } + __global__ void j0f_kernel_v1(float* x) { float result = j0f(x); } + __global__ void j0f_kernel_v2(Dummy x) { float result = j0f(x); } +)"}; + +static constexpr auto kJ1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void j1_kernel_v1(double* x) { double result = j1(x); } + __global__ void j1_kernel_v2(Dummy x) { double result = j1(x); } + __global__ void j1f_kernel_v1(float* x) { float result = j1f(x); } + __global__ void j1f_kernel_v2(Dummy x) { float result = j1f(x); } +)"}; + +static constexpr auto kJn{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void jn_kernel_v1(int* x, double y) { double result = jn(x, y); } + __global__ void jn_kernel_v2(int x, double* y) { double result = jn(x, y); } + __global__ void jn_kernel_v3(Dummy x, double y) { double result = jn(x, y); } + __global__ void jn_kernel_v4(int x, Dummy y) { double result = jn(x, y); } + __global__ void jnf_kernel_v1(int* x, float y) { float result = jnf(x, y); } + __global__ void jnf_kernel_v2(int x, float* y) { float result = jnf(x, y); } + __global__ void jnf_kernel_v3(Dummy x, float y) { float result = jnf(x, y); } + __global__ void jnf_kernel_v4(int x, Dummy y) { float result = jnf(x, y); } +)"}; + +static constexpr auto kY0{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void y0_kernel_v1(double* x) { double result = y0(x); } + __global__ void y0_kernel_v2(Dummy x) { double result = y0(x); } + __global__ void y0f_kernel_v1(float* x) { float result = y0f(x); } + __global__ void y0f_kernel_v2(Dummy x) { float result = y0f(x); } +)"}; + +static constexpr auto kY1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void y1_kernel_v1(double* x) { double result = y1(x); } + __global__ void y1_kernel_v2(Dummy x) { double result = y1(x); } + __global__ void y1f_kernel_v1(float* x) { float result = y1f(x); } + __global__ void y1f_kernel_v2(Dummy x) { float result = y1f(x); } +)"}; + +static constexpr auto kYn{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void yn_kernel_v1(int* x, double y) { double result = yn(x, y); } + __global__ void yn_kernel_v2(int x, double* y) { double result = yn(x, y); } + __global__ void yn_kernel_v3(Dummy x, double y) { double result = yn(x, y); } + __global__ void yn_kernel_v4(int x, Dummy y) { double result = yn(x, y); } + __global__ void ynf_kernel_v1(int* x, float y) { float result = ynf(x, y); } + __global__ void ynf_kernel_v2(int x, float* y) { float result = ynf(x, y); } + __global__ void ynf_kernel_v3(Dummy x, float y) { float result = ynf(x, y); } + __global__ void ynf_kernel_v4(int x, Dummy y) { float result = ynf(x, y); } +)"}; + +static constexpr auto kCylBesselI0{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cyl_bessel_i0_kernel_v1(double* x) { double result = cyl_bessel_i0(x); } + __global__ void cyl_bessel_i0_kernel_v2(Dummy x) { double result = cyl_bessel_i0(x); } + __global__ void cyl_bessel_i0f_kernel_v1(float* x) { float result = cyl_bessel_i0f(x); } + __global__ void cyl_bessel_i0f_kernel_v2(Dummy x) { float result = cyl_bessel_i0f(x); } +)"}; + +static constexpr auto kCylBesselI1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cyl_bessel_i1_kernel_v1(double* x) { double result = cyl_bessel_i1(x); } + __global__ void cyl_bessel_i1_kernel_v2(Dummy x) { double result = cyl_bessel_i1(x); } + __global__ void cyl_bessel_i1f_kernel_v1(float* x) { float result = cyl_bessel_i1f(x); } + __global__ void cyl_bessel_i1f_kernel_v2(Dummy x) { float result = cyl_bessel_i1f(x); } +)"}; diff --git a/catch/unit/math/math_special_values.hh b/catch/unit/math/math_special_values.hh new file mode 100644 index 0000000000..d68a246aca --- /dev/null +++ b/catch/unit/math/math_special_values.hh @@ -0,0 +1,294 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Disclaimer: +// This code is based on the work found in OpenCL-CTS authored by The Khronos Group. +// The original code can be found at https://github.com/KhronosGroup/OpenCL-CTS. +// We acknowledge the contributions of The Khronos Group to the development of this code. + +#pragma once + +#include +#include + +/*----------------------------------------------------------------------------- + HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type + float, double, long double respectively. Arguments: + + sm -- sign of number, + int -- integer part of mantissa (without `0x' prefix), + fract -- fractional part of mantissa (without decimal point and `L' or + `LL' suffixes), + se -- sign of exponent, + exp -- absolute value of (binary) exponent. + + Example: + + double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); // 0x1.5555555555555p-2 + + Note: + + We have to pass signs as separate arguments because gcc pass negative + integer values (e. g. `-2') into a macro as two separate tokens, so + `HEX_FLT(1, 0, -2)' produces result `0x1.0p- 2' (note a space between minus + and two) which is not a correct floating point literal. +-----------------------------------------------------------------------------*/ +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +// If compiler does not support hex floating point literals: +#define HEX_FLT(sm, int, fract, se, exp) \ + sm ldexpf((float)(0x##int##fract##UL), \ + se exp + ilogbf((float)0x##int) - ilogbf((float)(0x##int##fract##UL))) +#define HEX_DBL(sm, int, fract, se, exp) \ + sm ldexp((double)(0x##int##fract##ULL), \ + se exp + ilogb((double)0x##int) - ilogb((double)(0x##int##fract##ULL))) +#define HEX_LDBL(sm, int, fract, se, exp) \ + sm ldexpl((long double)(0x##int##fract##ULL), \ + se exp + ilogbl((long double)0x##int) - ilogbl((long double)(0x##int##fract##ULL))) +#else +// If compiler supports hex floating point literals: just concatenate all the +// parts into a literal. +#define HEX_FLT(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##F +#define HEX_DBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp +#define HEX_LDBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##L +#endif + +inline constexpr std::array kSpecialValuesDouble{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_DBL(-, 1, 0000000000001, +, 64), + HEX_DBL(-, 1, 0, +, 64), + HEX_DBL(-, 1, fffffffffffff, +, 63), + HEX_DBL(-, 1, 0000000000001, +, 63), + HEX_DBL(-, 1, 0, +, 63), + HEX_DBL(-, 1, fffffffffffff, +, 62), + HEX_DBL(-, 1, 000002, +, 32), + HEX_DBL(-, 1, 0, +, 32), + HEX_DBL(-, 1, fffffffffffff, +, 31), + HEX_DBL(-, 1, 0000000000001, +, 31), + HEX_DBL(-, 1, 0, +, 31), + HEX_DBL(-, 1, fffffffffffff, +, 30), + -1000.0, + -100.0, + -4.0, + -3.5, + -3.0, + HEX_DBL(-, 1, 8000000000001, +, 1), + -2.5, + HEX_DBL(-, 1, 7ffffffffffff, +, 1), + -2.0, + HEX_DBL(-, 1, 8000000000001, +, 0), + -1.5, + HEX_DBL(-, 1, 7ffffffffffff, +, 0), + HEX_DBL(-, 1, 0000000000001, +, 0), + -1.0, + HEX_DBL(-, 1, fffffffffffff, -, 1), + HEX_DBL(-, 1, 0000000000001, -, 1), + -0.5, + HEX_DBL(-, 1, fffffffffffff, -, 2), + HEX_DBL(-, 1, 0000000000001, -, 2), + -0.25, + HEX_DBL(-, 1, fffffffffffff, -, 3), + HEX_DBL(-, 1, 0000000000001, -, 1022), + -std::numeric_limits::min(), + HEX_DBL(-, 0, fffffffffffff, -, 1022), + HEX_DBL(-, 0, 0000000000fff, -, 1022), + HEX_DBL(-, 0, 00000000000fe, -, 1022), + HEX_DBL(-, 0, 000000000000e, -, 1022), + HEX_DBL(-, 0, 000000000000c, -, 1022), + HEX_DBL(-, 0, 000000000000a, -, 1022), + HEX_DBL(-, 0, 0000000000008, -, 1022), + HEX_DBL(-, 0, 0000000000007, -, 1022), + HEX_DBL(-, 0, 0000000000006, -, 1022), + HEX_DBL(-, 0, 0000000000005, -, 1022), + HEX_DBL(-, 0, 0000000000004, -, 1022), + HEX_DBL(-, 0, 0000000000003, -, 1022), + HEX_DBL(-, 0, 0000000000002, -, 1022), + HEX_DBL(-, 0, 0000000000001, -, 1022), + -0.0, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_DBL(+, 1, 0000000000001, +, 64), + HEX_DBL(+, 1, 0, +, 64), + HEX_DBL(+, 1, fffffffffffff, +, 63), + HEX_DBL(+, 1, 0000000000001, +, 63), + HEX_DBL(+, 1, 0, +, 63), + HEX_DBL(+, 1, fffffffffffff, +, 62), + HEX_DBL(+, 1, 000002, +, 32), + HEX_DBL(+, 1, 0, +, 32), + HEX_DBL(+, 1, fffffffffffff, +, 31), + HEX_DBL(+, 1, 0000000000001, +, 31), + HEX_DBL(+, 1, 0, +, 31), + HEX_DBL(+, 1, fffffffffffff, +, 30), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + HEX_DBL(+, 1, 8000000000001, +, 1), + +2.5, + HEX_DBL(+, 1, 7ffffffffffff, +, 1), + +2.0, + HEX_DBL(+, 1, 8000000000001, +, 0), + +1.5, + HEX_DBL(+, 1, 7ffffffffffff, +, 0), + HEX_DBL(+, 1, 0000000000001, +, 0), + +1.0, + HEX_DBL(+, 1, fffffffffffff, -, 1), + HEX_DBL(+, 1, 0000000000001, -, 1), + +0.5, + HEX_DBL(+, 1, fffffffffffff, -, 2), + HEX_DBL(+, 1, 0000000000001, -, 2), + +0.25, + HEX_DBL(+, 1, fffffffffffff, -, 3), + HEX_DBL(+, 1, 0000000000001, -, 1022), + +std::numeric_limits::min(), + HEX_DBL(+, 0, fffffffffffff, -, 1022), + HEX_DBL(+, 0, 0000000000fff, -, 1022), + HEX_DBL(+, 0, 00000000000fe, -, 1022), + HEX_DBL(+, 0, 000000000000e, -, 1022), + HEX_DBL(+, 0, 000000000000c, -, 1022), + HEX_DBL(+, 0, 000000000000a, -, 1022), + HEX_DBL(+, 0, 0000000000008, -, 1022), + HEX_DBL(+, 0, 0000000000007, -, 1022), + HEX_DBL(+, 0, 0000000000006, -, 1022), + HEX_DBL(+, 0, 0000000000005, -, 1022), + HEX_DBL(+, 0, 0000000000004, -, 1022), + HEX_DBL(+, 0, 0000000000003, -, 1022), + HEX_DBL(+, 0, 0000000000002, -, 1022), + HEX_DBL(+, 0, 0000000000001, -, 1022), + +0.0, +}; + +inline constexpr std::array kSpecialValuesFloat{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_FLT(-, 1, 000002, +, 64), + HEX_FLT(-, 1, 0, +, 64), + HEX_FLT(-, 1, fffffe, +, 63), + HEX_FLT(-, 1, 000002, +, 63), + HEX_FLT(-, 1, 0, +, 63), + HEX_FLT(-, 1, fffffe, +, 62), + HEX_FLT(-, 1, 000002, +, 32), + HEX_FLT(-, 1, 0, +, 32), + HEX_FLT(-, 1, fffffe, +, 31), + HEX_FLT(-, 1, 000002, +, 31), + HEX_FLT(-, 1, 0, +, 31), + HEX_FLT(-, 1, fffffe, +, 30), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + HEX_FLT(-, 1, 800002, +, 1), + -2.5f, + HEX_FLT(-, 1, 7ffffe, +, 1), + -2.0f, + HEX_FLT(-, 1, 800002, +, 0), + -1.5f, + HEX_FLT(-, 1, 7ffffe, +, 0), + HEX_FLT(-, 1, 000002, +, 0), + -1.0f, + HEX_FLT(-, 1, fffffe, -, 1), + HEX_FLT(-, 1, 000002, -, 1), + -0.5f, + HEX_FLT(-, 1, fffffe, -, 2), + HEX_FLT(-, 1, 000002, -, 2), + -0.25f, + HEX_FLT(-, 1, fffffe, -, 3), + HEX_FLT(-, 1, 000002, -, 126), + -std::numeric_limits::min(), + HEX_FLT(-, 0, fffffe, -, 126), + HEX_FLT(-, 0, 000ffe, -, 126), + HEX_FLT(-, 0, 0000fe, -, 126), + HEX_FLT(-, 0, 00000e, -, 126), + HEX_FLT(-, 0, 00000c, -, 126), + HEX_FLT(-, 0, 00000a, -, 126), + HEX_FLT(-, 0, 000008, -, 126), + HEX_FLT(-, 0, 000006, -, 126), + HEX_FLT(-, 0, 000004, -, 126), + HEX_FLT(-, 0, 000002, -, 126), + -0.0f, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_FLT(+, 1, 000002, +, 64), + HEX_FLT(+, 1, 0, +, 64), + HEX_FLT(+, 1, fffffe, +, 63), + HEX_FLT(+, 1, 000002, +, 63), + HEX_FLT(+, 1, 0, +, 63), + HEX_FLT(+, 1, fffffe, +, 62), + HEX_FLT(+, 1, 000002, +, 32), + HEX_FLT(+, 1, 0, +, 32), + HEX_FLT(+, 1, fffffe, +, 31), + HEX_FLT(+, 1, 000002, +, 31), + HEX_FLT(+, 1, 0, +, 31), + HEX_FLT(+, 1, fffffe, +, 30), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + HEX_FLT(+, 1, 800002, +, 1), + 2.5f, + HEX_FLT(+, 1, 7ffffe, +, 1), + +2.0f, + HEX_FLT(+, 1, 800002, +, 0), + 1.5f, + HEX_FLT(+, 1, 7ffffe, +, 0), + HEX_FLT(+, 1, 000002, +, 0), + +1.0f, + HEX_FLT(+, 1, fffffe, -, 1), + HEX_FLT(+, 1, 000002, -, 1), + +0.5f, + HEX_FLT(+, 1, fffffe, -, 2), + HEX_FLT(+, 1, 000002, -, 2), + +0.25f, + HEX_FLT(+, 1, fffffe, -, 3), + HEX_FLT(+, 1, 000002, -, 126), + +std::numeric_limits::min(), + HEX_FLT(+, 0, fffffe, -, 126), + HEX_FLT(+, 0, 000ffe, -, 126), + HEX_FLT(+, 0, 0000fe, -, 126), + HEX_FLT(+, 0, 00000e, -, 126), + HEX_FLT(+, 0, 00000c, -, 126), + HEX_FLT(+, 0, 00000a, -, 126), + HEX_FLT(+, 0, 000008, -, 126), + HEX_FLT(+, 0, 000006, -, 126), + HEX_FLT(+, 0, 000004, -, 126), + HEX_FLT(+, 0, 000002, -, 126), + +0.0f, +}; + +inline constexpr std::array kSpecialValuesInt{ + 0, 1, 2, 3, 126, 127, 128, 1022, 1023, 1024, 0x02000001, 0x04000001, 1465264071, 1488522147, + std::numeric_limits::max(), -1, -2, -3, -126, -127, -128, -1022, -1023, -11024, -0x02000001, + -0x04000001, -1465264071, -1488522147, std::numeric_limits::min(), -std::numeric_limits::max() +}; + +template struct SpecialVals { + const T* const data; + const size_t size; +}; + +inline constexpr auto kSpecialValRegistry = + std::make_tuple(SpecialVals{kSpecialValuesFloat.data(), kSpecialValuesFloat.size()}, + SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}, + SpecialVals{kSpecialValuesInt.data(), kSpecialValuesInt.size()}); diff --git a/catch/unit/math/misc_funcs.cc b/catch/unit/math/misc_funcs.cc new file mode 100644 index 0000000000..35e21fb26e --- /dev/null +++ b/catch/unit/math/misc_funcs.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "misc_negative_kernels_rtc.hh" + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +MATH_UNARY_WITHIN_ULP_TEST_DEF(fabs, std::fabs, 0, 0) +TEST_CASE("Unit_Device_fabs_fabsf_Negative_RTC") { NegativeTestRTCWrapper<4>(kFabs); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(copysign, std::copysign, 0, 0) +TEST_CASE("Unit_Device_copysign_copysignf_Negative_RTC") { NegativeTestRTCWrapper<8>(kCopySign); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fmax, std::fmax, 0, 0) +TEST_CASE("Unit_Device_fmax_fmaxf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmax); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fmin, std::fmin, 0, 0) +TEST_CASE("Unit_Device_fmin_fminf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmin); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(nextafter, std::nextafter, 0, 0) +TEST_CASE("Unit_Device_nextafter_nextafterf_Negative_RTC") { + NegativeTestRTCWrapper<8>(kNextAfter); +} + +MATH_TERNARY_WITHIN_ULP_TEST_DEF(fma, std::fma, 0, 0) +TEST_CASE("Unit_Device_fma_fmaf_Negative_RTC") { NegativeTestRTCWrapper<12>(kFma); } + +__global__ void fdividef_kernel(float* const ys, const size_t num_xs, float* const x1s, + float* const x2s) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + ys[i] = fdividef(x1s[i], x2s[i]); + } +} + +TEST_CASE("Unit_Device_fdividef_Accuracy_Positive") { + double (*ref)(double, double) = [](double x1, double x2) { return x1 / x2; }; + BinaryFloatingPointTest(fdividef_kernel, ref, ULPValidatorBuilderFactory(0)); +} + +TEST_CASE("Unit_Device_fdividef_Negative_RTC") { NegativeTestRTCWrapper<4>(kFdividef); } + +#define MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(kern_name, ref_func) \ + template \ + __global__ void kern_name##_kernel(bool* const ys, const size_t num_xs, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = kern_name(xs[i]); \ + } \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + bool (*ref)(double) = ref_func; \ + UnarySinglePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + bool (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(isfinite, std::isfinite) +TEST_CASE("Unit_Device_isfinite_Negative_RTC") { NegativeTestRTCWrapper<4>(kIsFinite); } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(isinf, std::isinf) +TEST_CASE("Unit_Device_isinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kIsInf); } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(isnan, std::isnan) +TEST_CASE("Unit_Device_isnan_Negative_RTC") { NegativeTestRTCWrapper<4>(kIsNan); } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(signbit, std::signbit) +TEST_CASE("Unit_Device_signbit_Negative_RTC") { NegativeTestRTCWrapper<4>(kSignBit); } \ No newline at end of file diff --git a/catch/unit/math/misc_negative_kernels.cc b/catch/unit/math/misc_negative_kernels.cc new file mode 100644 index 0000000000..761bd9cebf --- /dev/null +++ b/catch/unit/math/misc_negative_kernels.cc @@ -0,0 +1,87 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define MISC_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +#define MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v3(double* x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v4(Dummy x) { bool result = func_name(x); } + +#define MISC_BINARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(float x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { float result = func_name##f(x, y); } \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } + +/*Expecting 4 errors*/ +MISC_UNARY_NEGATIVE_KERNELS(fabs) + +/*Expecting 8 errors per macro invocation - 40 total*/ +MISC_BINARY_NEGATIVE_KERNELS(copysign) +MISC_BINARY_NEGATIVE_KERNELS(fmax) +MISC_BINARY_NEGATIVE_KERNELS(fmin) +MISC_BINARY_NEGATIVE_KERNELS(nextafter) +MISC_BINARY_NEGATIVE_KERNELS(fma) + +/*Expecting 4 errors*/ +__global__ void fdividef_kernel_v1(float* x, float y) { float result = fdividef(x, y); } +__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdivide(x); } + +/*Expecting 4 errors per macro invocation - 16 total*/ +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(isfinite) +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(isinf) +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(isnan) +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(signbit) + +/*Expecting 12 errors*/ +__global__ void fmaf_kernel_v1(float* x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v2(Dummy x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v3(float x, float* y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v4(float x, Dummy y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v5(float x, float y, float* z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v6(float x, float y, Dummy z) { float result = fmaf(x, y, z); } +__global__ void fma_kernel_v1(double* x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v2(Dummy x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v3(double x, double* y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v4(double x, Dummy y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v5(double x, double y, double* z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v6(double x, double y, Dummy z) { double result = fmaf(x, y, z); } \ No newline at end of file diff --git a/catch/unit/math/misc_negative_kernels_rtc.hh b/catch/unit/math/misc_negative_kernels_rtc.hh new file mode 100644 index 0000000000..66521da090 --- /dev/null +++ b/catch/unit/math/misc_negative_kernels_rtc.hh @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +static constexpr auto kFabs{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fabsf_kernel_v1(float* x) { float result = fabsf(x); } +__global__ void fabsf_kernel_v2(Dummy x) { float result = fabsf(x); } +__global__ void fabs_kernel_v1(double* x) { double result = fabs(x); } +__global__ void fabs_kernel_v2(Dummy x) { double result = fabs(x); } +)"}; + +static constexpr auto kCopySign{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void copysignf_kernel_v1(float* x, float y) { float result = copysignf(x, y); } +__global__ void copysignf_kernel_v2(Dummy x, float y) { float result = copysignf(x, y); } +__global__ void copysignf_kernel_v3(float x, float* y) { float result = copysignf(x, y); } +__global__ void copysignf_kernel_v4(float x, Dummy y) { float result = copysignf(x, y); } +__global__ void copysign_kernel_v1(double* x, double y) { double result = copysign(x, y); } +__global__ void copysign_kernel_v2(Dummy x, double y) { double result = copysign(x, y); } +__global__ void copysign_kernel_v3(double x, double* y) { double result = copysign(x, y); } +__global__ void copysign_kernel_v4(double x, Dummy y) { double result = copysign(x, y); } +)"}; + +static constexpr auto kFmax{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fmaxf_kernel_v1(float* x, float y) { float result = fmaxf(x, y); } +__global__ void fmaxf_kernel_v2(Dummy x, float y) { float result = fmaxf(x, y); } +__global__ void fmaxf_kernel_v3(float x, float* y) { float result = fmaxf(x, y); } +__global__ void fmaxf_kernel_v4(float x, Dummy y) { float result = fmaxf(x, y); } +__global__ void fmax_kernel_v1(double* x, double y) { double result = fmax(x, y); } +__global__ void fmax_kernel_v2(Dummy x, double y) { double result = fmax(x, y); } +__global__ void fmax_kernel_v3(double x, double* y) { double result = fmax(x, y); } +__global__ void fmax_kernel_v4(double x, Dummy y) { double result = fmax(x, y); } +)"}; + +static constexpr auto kFmin{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fminf_kernel_v1(float* x, float y) { float result = fminf(x, y); } +__global__ void fminf_kernel_v2(Dummy x, float y) { float result = fminf(x, y); } +__global__ void fminf_kernel_v3(float x, float* y) { float result = fminf(x, y); } +__global__ void fminf_kernel_v4(float x, Dummy y) { float result = fminf(x, y); } +__global__ void fmin_kernel_v1(double* x, double y) { double result = fmin(x, y); } +__global__ void fmin_kernel_v2(Dummy x, double y) { double result = fmin(x, y); } +__global__ void fmin_kernel_v3(double x, double* y) { double result = fmin(x, y); } +__global__ void fmin_kernel_v4(double x, Dummy y) { double result = fmin(x, y); } +)"}; + +static constexpr auto kNextAfter{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void nextafterf_kernel_v1(float* x, float y) { float result = nextafterf(x, y); } +__global__ void nextafterf_kernel_v2(Dummy x, float y) { float result = nextafterf(x, y); } +__global__ void nextafterf_kernel_v3(float x, float* y) { float result = nextafterf(x, y); } +__global__ void nextafterf_kernel_v4(float x, Dummy y) { float result = nextafterf(x, y); } +__global__ void nextafter_kernel_v1(double* x, double y) { double result = nextafter(x, y); } +__global__ void nextafter_kernel_v2(Dummy x, double y) { double result = nextafter(x, y); } +__global__ void nextafter_kernel_v3(double x, double* y) { double result = nextafter(x, y); } +__global__ void nextafter_kernel_v4(double x, Dummy y) { double result = nextafter(x, y); } +)"}; + +static constexpr auto kFma{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fmaf_kernel_v1(float* x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v2(Dummy x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v3(float x, float* y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v4(float x, Dummy y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v5(float x, float y, float* z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v6(float x, float y, Dummy z) { float result = fmaf(x, y, z); } +__global__ void fma_kernel_v1(double* x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v2(Dummy x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v3(double x, double* y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v4(double x, Dummy y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v5(double x, double y, double* z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v6(double x, double y, Dummy z) { double result = fmaf(x, y, z); } +)"}; + +static constexpr auto kFdividef{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fdividef_kernel_v1(float* x, float y) { float result = fdividef(x, y); } +__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdivide(x); } +)"}; + +static constexpr auto kIsFinite{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void isfinite_kernel_v1(float* x) { bool result = isfinite(x); } +__global__ void isfinite_kernel_v2(Dummy x) { bool result = isfinite(x); } +__global__ void isfinite_kernel_v3(double* x) { bool result = isfinite(x); } +__global__ void isfinite_kernel_v4(Dummy x) { bool result = isfinite(x); } +)"}; + +static constexpr auto kIsInf{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void isinf_kernel_v1(float* x) { bool result = isinf(x); } +__global__ void isinf_kernel_v2(Dummy x) { bool result = isinf(x); } +__global__ void isinf_kernel_v3(double* x) { bool result = isinf(x); } +__global__ void isinf_kernel_v4(Dummy x) { bool result = isinf(x); } +)"}; + +static constexpr auto kIsNan{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void isnan_kernel_v1(float* x) { bool result = isnan(x); } +__global__ void isnan_kernel_v2(Dummy x) { bool result = isnan(x); } +__global__ void isnan_kernel_v3(double* x) { bool result = isnan(x); } +__global__ void isnan_kernel_v4(Dummy x) { bool result = isnan(x); } +)"}; + +static constexpr auto kSignBit{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void signbit_kernel_v1(float* x) { bool result = signbit(x); } +__global__ void signbit_kernel_v2(Dummy x) { bool result = signbit(x); } +__global__ void signbit_kernel_v3(double* x) { bool result = signbit(x); } +__global__ void signbit_kernel_v4(Dummy x) { bool result = signbit(x); } +)"}; diff --git a/catch/unit/math/pow_common.hh b/catch/unit/math/pow_common.hh new file mode 100644 index 0000000000..95402c72d1 --- /dev/null +++ b/catch/unit/math/pow_common.hh @@ -0,0 +1,134 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_POW_INT_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T1* const x1s, \ + T2* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + } + +template +using kernel_pow_int_sig = void (*)(T1*, const size_t, T1*, T2*); + +template using ref_pow_int_sig = T1 (*)(T1, T2); + +template +void PowIntFloatingPointBruteForceTest(kernel_pow_int_sig kernel, + ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) * 2 + sizeof(T2)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T1)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s] { + const auto generator1 = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(std::numeric_limits::lowest(), + std::numeric_limits::max()); + return static_cast(unif_dist(rng)); + }; + const auto generator2 = [] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_int_distribution unif_dist(std::numeric_limits::lowest(), + std::numeric_limits::max()); + return unif_dist(rng); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator1); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator2); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void PowIntFloatingPointSpecialValuesTest(kernel_pow_int_sig kernel, + ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values1 = std::get>(kSpecialValRegistry); + const auto values2 = std::get>(kSpecialValRegistry); + + const auto size = values1.size * values2.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(T1)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(T2)}; + + for (auto i = 0u; i < values1.size; ++i) { + for (auto j = 0u; j < values2.size; ++j) { + x1s.ptr()[i * values2.size + j] = values1.data[i]; + x2s.ptr()[i * values2.size + j] = static_cast(values2.data[j]); + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr()); +} + +template +void PowIntFloatingPointTest(kernel_pow_int_sig kernel, ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + PowIntFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { PowIntFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} \ No newline at end of file diff --git a/catch/unit/math/pow_funcs.cc b/catch/unit/math/pow_funcs.cc new file mode 100644 index 0000000000..1722a26db5 --- /dev/null +++ b/catch/unit/math/pow_funcs.cc @@ -0,0 +1,455 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "pow_common.hh" +#include "math_pow_negative_kernels_rtc.hh" + +/** + * @addtogroup PowMathFuncs PowMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `expf(x)` for all possible inputs and `exp(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for expf and exp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp_expf_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp2f(x)` for all possible inputs and `exp2(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp2(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp2, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for exp2f and exp2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp2_exp2f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp2); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `expm1f(x)` for all possible inputs and `expm1(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(expm1, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for expm1f and expm1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_expm1_expm1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExpm1); } + +MATH_UNARY_KERNEL_DEF(exp10) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp10f(x)` for all possible inputs. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10f_Accuracy_Positive") { + auto exp10_ref = [](double arg) -> double { return std::pow(10, arg); }; + double (*ref)(double) = exp10_ref; + UnarySinglePrecisionTest(exp10_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp10(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10_Accuracy_Positive") { + auto exp10_ref = [](long double arg) -> long double { return std::pow(10, arg); }; + long double (*ref)(long double) = exp10_ref; + UnaryDoublePrecisionTest(exp10_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for exp10f and exp10. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10_exp10f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp10); } + +template +__global__ void frexp_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = frexpf(xs[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = frexp(xs[i], &ys[i].second); + } + } +} + +template std::pair frexp_ref(T arg) { + int exp_v; + T res = std::frexp(arg, &exp_v); + return {res, exp_v}; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `frexpf(x, exp)` for all possible inputs. The results are + * compared against reference function `double std::frexp(double, int*)`. The maximum ulp error is + * 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexpf_Accuracy_Positive") { + UnarySinglePrecisionTest( + frexp_kernel, frexp_ref, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0), + EqValidatorBuilderFactory())); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `frexp(x, exp)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double std::frexp(long double, int*)`. The maximum ulp + * error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexp_Accuracy_Positive") { + UnaryDoublePrecisionTest( + frexp_kernel, frexp_ref, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0), + EqValidatorBuilderFactory())); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for frexpf and frexp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexp_frexpf_Negative_RTC") { NegativeTestRTCWrapper<20>(kFrexp); } + + +/********** Binary Functions **********/ + +MATH_BINARY_KERNEL_DEF(pow) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `powf(x, y)` and `pow(x, y)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::pow(T, T)`. The maximum ulp error + * for single precision is 4 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_pow_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto pow_ref = [](RT arg1, RT arg2) -> RT { + if (std::isinf(arg1) && arg2 < 0) return 0; + return std::pow(arg1, arg2); + }; + RT (*ref)(RT, RT) = pow_ref; + const auto ulp = std::is_same_v ? 4 : 2; + BinaryFloatingPointTest(pow_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for powf and pow. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_pow_powf_Negative_RTC") { NegativeTestRTCWrapper<8>(kPow); } + +MATH_POW_INT_KERNEL_DEF(ldexp) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ldexpf(x, exp)` and `ldexp(x, exp)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::ldexp(T, int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_ldexp_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, int) = std::ldexp; + PowIntFloatingPointTest(ldexp_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for ldexpf and ldexp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ldexp_ldexpf_Negative_RTC") { NegativeTestRTCWrapper<8>(kLdexp); } + +MATH_POW_INT_KERNEL_DEF(powi) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `powi(x, exp)` and `powi(x, exp)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::pow(T, T)`. The maximum ulp error + * for single precision is 4 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_powi_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto pow_ref = [](RT arg1, int arg2) -> RT { + if (std::isinf(arg1) && arg2 < 0) return 0; + return std::pow(arg1, static_cast(arg2)); + }; + RT (*ref)(RT, int) = pow_ref; + const auto ulp = std::is_same_v ? 4 : 2; + PowIntFloatingPointTest(powi_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for powif and powi. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_powi_powif_Negative_RTC") { NegativeTestRTCWrapper<8>(kPowi); } + +MATH_POW_INT_KERNEL_DEF(scalbn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `scalbnf(x, n)` and `scalbn(x, n)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::scalbn(T, int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_scalbn_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, int) = std::scalbn; + PowIntFloatingPointTest(scalbn_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for scalbnf and scalbn. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_scalbn_scalbnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbn); } + +MATH_POW_INT_KERNEL_DEF(scalbln) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `scalblnf(x, l)` and `scalbln(x, l)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::scalbn(T, long int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_scalbln_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, long int) = std::scalbln; + PowIntFloatingPointTest(scalbln_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for scalblnf and scalbln. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_scalbln_scalblnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbln); } diff --git a/catch/unit/math/quaternary_common.hh b/catch/unit/math/quaternary_common.hh new file mode 100644 index 0000000000..a9a8cc6778 --- /dev/null +++ b/catch/unit/math/quaternary_common.hh @@ -0,0 +1,246 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_QUATERNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, T* const x1s, T* const x2s, \ + T* const x3s, T* const x4s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i], x3s[i], x4s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i], x4s[i]); \ + } \ + } \ + } + +inline constexpr std::array kSpecialValuesReducedDouble{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_DBL(-, 1, 0000000000001, +, 64), + HEX_DBL(-, 1, fffffffffffff, +, 63), + HEX_DBL(-, 1, fffffffffffff, +, 62), + HEX_DBL(-, 1, 0, +, 32), + HEX_DBL(-, 1, 0000000000001, +, 31), + HEX_DBL(-, 1, fffffffffffff, +, 30), + -1000.0, + -3.5, + HEX_DBL(-, 1, 8000000000001, +, 1), + -2.5, + HEX_DBL(-, 1, 8000000000001, +, 0), + -1.5, + -0.5, + -0.25, + HEX_DBL(-, 1, fffffffffffff, -, 3), + -std::numeric_limits::min(), + HEX_DBL(-, 0, fffffffffffff, -, 1022), + HEX_DBL(-, 0, 0000000000001, -, 1022), + -0.0, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_DBL(+, 1, 0, +, 64), + HEX_DBL(+, 1, 0000000000001, +, 63), + HEX_DBL(+, 1, 000002, +, 32), + HEX_DBL(+, 1, fffffffffffff, +, 31), + HEX_DBL(+, 1, 0, +, 31), + HEX_DBL(+, 1, fffffffffffff, +, 30), + +100.0, + +3.0, + HEX_DBL(+, 1, 7ffffffffffff, +, 1), + +2.0, + HEX_DBL(+, 1, 7ffffffffffff, +, 0), + +1.0, + HEX_DBL(+, 1, fffffffffffff, -, 2), + +std::numeric_limits::min(), + HEX_DBL(+, 0, 0000000000fff, -, 1022), + HEX_DBL(+, 0, 0000000000007, -, 1022), + +0.0, +}; + +inline constexpr std::array kSpecialValuesReducedFloat{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_FLT(-, 1, 000002, +, 64), + HEX_FLT(-, 1, fffffe, +, 63), + HEX_FLT(-, 1, fffffe, +, 62), + HEX_FLT(-, 1, 0, +, 32), + HEX_FLT(-, 1, fffffe, +, 31), + HEX_FLT(-, 1, fffffe, +, 30), + -1000.f, + -3.5f, + HEX_FLT(-, 1, 800002, +, 1), + -2.5f, + HEX_FLT(-, 1, 800002, +, 0), + -1.5f, + -0.5f, + -0.25f, + HEX_FLT(-, 1, fffffe, -, 3), + -std::numeric_limits::min(), + HEX_FLT(-, 0, fffffe, -, 126), + HEX_FLT(-, 0, 000002, -, 126), + -0.0f, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_FLT(+, 1, 0, +, 64), + HEX_FLT(+, 1, 000002, +, 63), + HEX_FLT(+, 1, 000002, +, 32), + HEX_FLT(+, 1, 000002, +, 31), + HEX_FLT(+, 1, fffffe, +, 30), + +100.f, + +4.0f, + HEX_FLT(+, 1, 7ffffe, +, 1), + +2.0f, + HEX_FLT(+, 1, 7ffffe, +, 0), + +1.0f, + HEX_FLT(+, 1, fffffe, -, 2), + +std::numeric_limits::min(), + HEX_FLT(+, 0, 000ffe, -, 126), + HEX_FLT(+, 0, 000006, -, 126), + +0.0f, +}; + +inline constexpr auto kSpecialValReducedRegistry = std::make_tuple( + SpecialVals{kSpecialValuesReducedFloat.data(), kSpecialValuesReducedFloat.size()}, + SpecialVals{kSpecialValuesReducedDouble.data(), kSpecialValuesReducedDouble.size()}); + +template +void QuaternaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 4 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x4s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s, &x3s, &x4s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x3s.ptr() + base_idx, x3s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x4s.ptr() + base_idx, x4s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr(), x3s.ptr(), x4s.ptr()); + } +} + +template +void QuaternaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValReducedRegistry); + + const auto size = values.size * values.size * values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x4s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + for (auto k = 0u; k < values.size; ++k) { + for (auto l = 0u; l < values.size; ++l) { + x1s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[i]; + x2s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[j]; + x3s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[k]; + x4s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[l]; + } + } + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr(), x3s.ptr(), x4s.ptr()); +} + +template +void QuaternaryFloatingPointTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + QuaternaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { + QuaternaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); + } +} + + +#define MATH_QUATERNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_QUATERNARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT, RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + QuaternaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } diff --git a/catch/unit/math/remainder_and_rounding_funcs.cc b/catch/unit/math/remainder_and_rounding_funcs.cc new file mode 100644 index 0000000000..635a68a459 --- /dev/null +++ b/catch/unit/math/remainder_and_rounding_funcs.cc @@ -0,0 +1,153 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "math_remainder_rounding_negative_kernels_rtc.hh" + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fmod, std::fmod, 0, 0) +TEST_CASE("Unit_Device_fmod_fmodf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmod); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(remainder, std::remainder, 0, 0) +TEST_CASE("Unit_Device_remainder_remainder_Negative_RTC") { NegativeTestRTCWrapper<8>(kRemainder); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fdim, std::fdim, 0, 0) +TEST_CASE("Unit_Device_fdim_fdimf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFdim); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(trunc, std::trunc, 0, 0) +TEST_CASE("Unit_Device_trunc_truncf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTrunc); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(round, std::round, 0, 0) +TEST_CASE("Unit_Device_round_roundf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRound); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(rint, std::rint, 0, 0) +TEST_CASE("Unit_Device_rint_rintf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRint); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(nearbyint, std::nearbyint, 0, 0) +TEST_CASE("Unit_Device_nearbyint_nearbyintf_Negative_RTC") { + NegativeTestRTCWrapper<4>(kNearbyint); +} + +MATH_UNARY_WITHIN_ULP_TEST_DEF(ceil, std::ceil, 0, 0) +TEST_CASE("Unit_Device_ceil_ceilf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCeil); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(floor, std::floor, 0, 0) +TEST_CASE("Unit_Device_floor_floorf_Negative_RTC") { NegativeTestRTCWrapper<4>(kFloor); } + + +#define LONG_CONVERSION_FUNCTION_TEST_DEF(kern_name, ref_func, lt) \ + MATH_UNARY_KERNEL_DEF(kern_name) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + lt (*ref)(double) = ref_func; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, \ + EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::lowest()), \ + static_cast(std::numeric_limits::max())); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + lt (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, \ + EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::lowest()), \ + static_cast(std::numeric_limits::max())); \ + } + +LONG_CONVERSION_FUNCTION_TEST_DEF(lrint, std::lrint, long) +TEST_CASE("Unit_Device_lrint_lrintf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLrint); } + +LONG_CONVERSION_FUNCTION_TEST_DEF(lround, std::lround, long) +TEST_CASE("Unit_Device_lround_lroundf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLround); } + +LONG_CONVERSION_FUNCTION_TEST_DEF(llrint, std::llrint, long long) +TEST_CASE("Unit_Device_llrint_llrintf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLlrint); } + +LONG_CONVERSION_FUNCTION_TEST_DEF(llround, std::llround, long long) +TEST_CASE("Unit_Device_llround_llroundf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLlround); } + + +template +__global__ void remquo_kernel(std::pair* const ys, const size_t num_xs, T* const x1s, + T* const x2s) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = remquof(x1s[i], x2s[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = remquo(x1s[i], x2s[i], &ys[i].second); + } + } +} + +template std::pair remquo_wrapper(T x1, T x2) { + std::pair ret; + ret.first = std::remquo(x1, x2, &ret.second); + return ret; +} + +TEMPLATE_TEST_CASE("Unit_Device_remquo_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + std::pair (*ref)(RT, RT) = remquo_wrapper; + const auto ulp_builder = ULPValidatorBuilderFactory(0); + const auto eq_builder = EqValidatorBuilderFactory(); + + BinaryFloatingPointTest(remquo_kernel, ref, + PairValidatorBuilderFactory(ulp_builder, eq_builder)); +} + +TEST_CASE("Unit_Device_remquo_remquof_Negative_RTC") { NegativeTestRTCWrapper<24>(kRemquo); } + +template +__global__ void modf_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = modff(xs[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = modf(xs[i], &ys[i].second); + } + } +} + +template std::pair modf_wrapper(T x) { + std::pair ret; + ret.first = std::modf(x, &ret.second); + return ret; +} + +TEST_CASE("Unit_Device_modf_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + modf_kernel, modf_wrapper, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0))); +} + +TEST_CASE("Unit_Device_modf_Accuracy_Positive - double") { + UnaryDoublePrecisionTest( + modf_kernel, modf_wrapper, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0))); +} + +TEST_CASE("Unit_Device_modf_modff_Negative_RTC") { NegativeTestRTCWrapper<20>(kModf); } diff --git a/catch/unit/math/root_funcs.cc b/catch/unit/math/root_funcs.cc new file mode 100644 index 0000000000..1638ca8b04 --- /dev/null +++ b/catch/unit/math/root_funcs.cc @@ -0,0 +1,604 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" +#include "quaternary_common.hh" +#include "math_root_negative_kernels_rtc.hh" + +/** + * @addtogroup RootMathFuncs RootMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +MATH_UNARY_KERNEL_DEF(sqrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `sqrtf(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrtf_Accuracy_Positive") { + float (*ref)(float) = std::sqrt; + UnarySinglePrecisionTest(sqrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `sqrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `double std::sqrt(double)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrt_Accuracy_Positive") { + double (*ref)(double) = std::sqrt; + UnaryDoublePrecisionTest(sqrt_kernel, ref, ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for sqrtf and sqrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrt_sqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSqrt); } + +MATH_UNARY_KERNEL_DEF(rsqrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rsqrtf(x)` for all possible inputs. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrtf_Accuracy_Positive") { + auto rsqrt_ref = [](double arg) -> double { return 1. / std::sqrt(arg); }; + double (*ref)(double) = rsqrt_ref; + UnarySinglePrecisionTest(rsqrt_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rsqrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrt_Accuracy_Positive") { + auto rsqrt_ref = [](long double arg) -> long double { return 1.L / std::sqrt(arg); }; + long double (*ref)(long double) = rsqrt_ref; + UnaryDoublePrecisionTest(rsqrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for rsqrtf and rsqrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrt_rsqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRsqrt); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cbrtf(x)` for all possible inputs and `cbrt(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::cbrt(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_TEST_DEF(cbrt, std::cbrt, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cbrtf and cbrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cbrt_cbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCbrt); } + +MATH_UNARY_KERNEL_DEF(rcbrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rcbrtf(x)` for all possible inputs. The maximum ulp error + * is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrtf_Accuracy_Positive") { + auto rcbrt_ref = [](double arg) -> double { return 1. / std::cbrt(arg); }; + double (*ref)(double) = rcbrt_ref; + UnarySinglePrecisionTest(rcbrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rcbrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrt_Accuracy_Positive") { + auto rcbrt_ref = [](long double arg) -> long double { return 1. / std::cbrt(arg); }; + long double (*ref)(long double) = rcbrt_ref; + UnaryDoublePrecisionTest(rcbrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for rcbrtf and rcbrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrt_rcbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRcbrt); } + +/********** Binary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hypotf(x, y)` and `hypot(x, y)` against a table of + * difficult values, followed by a large number of randomly generated values. The results are + * compared against reference function `T std::hypot(T, T)`. The maximum ulp error for single + * precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_WITHIN_ULP_TEST_DEF(hypot, std::hypot, 3, 2) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for hypotf and hypot. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_hypot_hypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kHypot); } + +MATH_BINARY_KERNEL_DEF(rhypot) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rhypotf(x, y)` and `rhypot(x, y)`against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rhypot_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rhypot_ref = [](RT arg1, RT arg2) -> RT { return 1. / std::hypot(arg1, arg2); }; + RT (*ref)(RT, RT) = rhypot_ref; + const auto ulp = std::is_same_v ? 2 : 1; + BinaryFloatingPointTest(rhypot_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rhypotf and rhypot. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rhypot_rhypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kRhypot); } + +/********** Ternary Functions **********/ + +MATH_TERNARY_KERNEL_DEF(norm3d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `norm3df(x, y, z)` and `norm3d(x, y, z)` against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm3d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto norm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) { + return std::numeric_limits::infinity(); + } + return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); + }; + RT (*ref)(RT, RT, RT) = norm3d_ref; + const auto ulp = std::is_same_v ? 3 : 2; + TernaryFloatingPointTest(norm3d_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for norm3df and norm3d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm3d_norm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kNorm3D); } + +MATH_TERNARY_KERNEL_DEF(rnorm3d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rnorm3df(x, y, z)` and `rnorm3d(x, y, z)`against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm3d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) { + return 0; + } + return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); + }; + RT (*ref)(RT, RT, RT) = rnorm3d_ref; + const auto ulp = std::is_same_v ? 2 : 1; + TernaryFloatingPointTest(rnorm3d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnorm3df and rnorm3d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm3d_rnorm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kRnorm3D); } + +/********** Quaternary Functions **********/ + +MATH_QUATERNARY_KERNEL_DEF(norm4d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `norm4df(x, y, z, t)` and `norm4d(x, y, z, t)` against a + * table of difficult values, followed by a large number of randomly generated values. The maximum + * ulp error for single precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm4d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto norm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) { + return std::numeric_limits::infinity(); + } + return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); + }; + RT (*ref)(RT, RT, RT, RT) = norm4d_ref; + const auto ulp = std::is_same_v ? 3 : 2; + QuaternaryFloatingPointTest(norm4d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for norm4df and norm4d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm4d_norm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kNorm4D); } + +MATH_QUATERNARY_KERNEL_DEF(rnorm4d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rnorm4df(x, y, z, t)` and `rnorm4d(x, y, z, t)`against a + * table of difficult values, followed by a large number of randomly generated values. The maximum + * ulp error for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm4d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) { + return 0; + } + return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); + }; + RT (*ref)(RT, RT, RT, RT) = rnorm4d_ref; + const auto ulp = std::is_same_v ? 2 : 1; + QuaternaryFloatingPointTest(rnorm4d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnorm4df and rnorm4d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm4d_rnorm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kRnorm4D); } + +/********** norm Function **********/ + +#define MATH_NORM_KERNEL_DEF(func_name) \ + template __global__ void func_name##_kernel(T* const ys, int dim, T* const x1s) { \ + if constexpr (std::is_same_v) { \ + *ys = func_name##f(dim, x1s); \ + } else if constexpr (std::is_same_v) { \ + *ys = func_name(dim, x1s); \ + } \ + } + +template +void NormSimpleTest(F kernel, RF ref_func, const ValidatorBuilder& validator_builder) { + const auto max_dim = 10000; + + LinearAllocGuard x{LinearAllocs::hipHostMalloc, max_dim * sizeof(T)}; + LinearAllocGuard x_dev{LinearAllocs::hipMalloc, max_dim * sizeof(T)}; + LinearAllocGuard y{LinearAllocs::hipHostMalloc, sizeof(T)}; + LinearAllocGuard y_dev{LinearAllocs::hipMalloc, sizeof(T)}; + + std::fill_n(x.ptr(), max_dim, 1); + HIP_CHECK(hipMemcpy(x_dev.ptr(), x.ptr(), max_dim * sizeof(T), hipMemcpyHostToDevice)); + + for (uint64_t i = 1u; i < max_dim; i++) { + kernel<<<1, 1>>>(y_dev.ptr(), i, x_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y.ptr(), y_dev.ptr(), sizeof(T), hipMemcpyDeviceToHost)); + const auto actual_val = *y.ptr(); + const auto ref_val = static_cast(ref_func(i, x.ptr())); + const auto validator = validator_builder(ref_val); + + if (!validator->match(actual_val)) { + std::stringstream ss; + ss << std::scientific << std::setprecision(std::numeric_limits::max_digits10 - 1); + ss << "Validation fails for dim: " << i << " " << actual_val << " " << ref_val; + INFO(ss.str()); + REQUIRE(false); + } + } +} + +MATH_NORM_KERNEL_DEF(norm) + +/** + * Test Description + * ------------------------ + * - Sanity test for `normf(dim, arr)` and `norm(dim, arr)`. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm_Sanity_Positive", "", float, double) { + using RT = RefType_t; + auto norm_ref = [](int dim, TestType* args) -> RT { + RT sum = 0; + for (int i = 0; i < dim; i++) { + if (std::isinf(args[i])) return std::numeric_limits::infinity(); + sum += static_cast(args[i]) * static_cast(args[i]); + } + return std::sqrt(sum); + }; + RT (*ref)(int, TestType*) = norm_ref; + + NormSimpleTest(norm_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for normf and norm. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm_normf_Negative_RTC") { NegativeTestRTCWrapper<18>(kNorm); } + +MATH_NORM_KERNEL_DEF(rnorm) + +/** + * Test Description + * ------------------------ + * - Sanity test for `rnormf(dim, arr)` and `rnorm(dim, arr)`. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm_Sanity_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm_ref = [](int dim, TestType* args) -> RT { + RT sum = 0; + for (int i = 0; i < dim; i++) { + if (std::isinf(args[i])) return std::numeric_limits::infinity(); + sum += static_cast(args[i]) * static_cast(args[i]); + } + return 1. / std::sqrt(sum); + }; + RT (*ref)(int, TestType*) = rnorm_ref; + + NormSimpleTest(rnorm_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnormf and rnorm. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm_rnormf_Negative_RTC") { NegativeTestRTCWrapper<18>(kRnorm); } diff --git a/catch/unit/math/single_precision_intrinsics.cc b/catch/unit/math/single_precision_intrinsics.cc new file mode 100644 index 0000000000..f0c12a67fc --- /dev/null +++ b/catch/unit/math/single_precision_intrinsics.cc @@ -0,0 +1,530 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +/********** Unary Functions **********/ + +#define MATH_UNARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnarySinglePrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_SP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x) + + +static float __frcp_rn_ref(float x) { return 1.0f / x; } + +MATH_UNARY_SP_KERNEL_DEF(__frcp_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__frcp_rn(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__frcp_rn, __frcp_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_SP_KERNEL_DEF(__fsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fsqrt_rn(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__fsqrt_rn, static_cast(std::sqrt), + EqValidatorBuilderFactory()); + + +static float __frsqrt_rn_ref(float x) { return 1.0f / std::sqrt(x); } + +MATH_UNARY_SP_KERNEL_DEF(__frsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__frsqrt_rn(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__frsqrt_rn, __frsqrt_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__expf) { + const int64_t ulp_err = 2 + static_cast(std::floor(std::abs(1.16f * x))); + return ULPValidatorBuilderFactory(ulp_err)(target); +} + +MATH_UNARY_SP_KERNEL_DEF(__expf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__expf(x)` for all possible inputs. The results are + * compared against reference function `double std::exp(double)`. The maximum ulp error is `2 + + * floor(abs(1.16 * x))`. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__expf, static_cast(std::exp)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__exp10f) { + const int64_t ulp_err = 2 + static_cast(std::floor(std::abs(2.95f * x))); + return ULPValidatorBuilderFactory(ulp_err)(target); +} + +MATH_UNARY_SP_KERNEL_DEF(__exp10f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__exp10f(x)` for all possible inputs. The results are + * compared against reference function `double exp10(double)`. The maximum ulp error is `2 + + * floor(abs(2.95 * x))`. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__exp10f, static_cast(exp10)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__logf) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -21.41); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(3)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__logf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__logf(x)` for all possible inputs. The results are + * compared against reference function `double std::log(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-21.41, otherwise, the maximum ulp error is 3. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__logf, static_cast(std::log)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log2f) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -22.0); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(2)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__log2f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__log2f(x)` for all possible inputs. The results are + * compared against reference function `double std::log2(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-22, otherwise, the maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__log2f, static_cast(std::log2)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log10f) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -24.0); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(3)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__log10f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__log10f(x)` for all possible inputs. The results are + * compared against reference function `double std::log10(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-24, otherwise, the maximum ulp error is 3. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__log10f, static_cast(std::log10)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__sinf) { + if (-M_PI <= x && x <= M_PI) { + const auto abs_err = std::pow(2.0, -21.41); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__sinf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sinf(x)` for all possible inputs. The results are + * compared against reference function `double std::sin(double)`. For `x` in [-PI, PI], the maximum + * absolute error is 2^-21.41, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__sinf, static_cast(std::sin)); + + +__device__ float __sincosf_sin(float x) { + float sin, cos; + __sincosf(x, &sin, &cos); + return sin; +} + +MATH_UNARY_SP_KERNEL_DEF(__sincosf_sin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The + * results in `sptr` are compared against reference function `double std::sin(double)`. For `x` in + * [-PI, PI], the maximum absolute error is 2^-21.41, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_sin, static_cast(std::sin), + __sinf_validator_builder); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__cosf) { + if (-M_PI <= x && x <= M_PI) { + const auto abs_err = std::pow(2.0, -21.19); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__cosf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__cosf(x)` for all possible inputs. The results are + * compared against reference function `double std::cos(double)`. For `x` in [-PI, PI], the maximum + * absolute error is 2^-21.19, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__cosf, static_cast(std::cos)); + + +__device__ float __sincosf_cos(float x) { + float sin, cos; + __sincosf(x, &sin, &cos); + return cos; +} + +MATH_UNARY_SP_KERNEL_DEF(__sincosf_cos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The + * results in `cptr` are compared against reference function `double std::cos(double)`. For `x` in + * [-PI, PI], the maximum absolute error is 2^-21.19, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_cos, static_cast(std::cos), + __cosf_validator_builder); + + +/********** Binary Functions **********/ + +#define MATH_BINARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s, \ + float* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_SP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2) + +static float __fadd_rn_ref(float x1, float x2) { return x1 + x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fadd_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fadd_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fadd_rn, __fadd_rn_ref, EqValidatorBuilderFactory()); + + +static float __fsub_rn_ref(float x1, float x2) { return x1 - x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fsub_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fsub_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fsub_rn, __fsub_rn_ref, EqValidatorBuilderFactory()); + + +static float __fmul_rn_ref(float x1, float x2) { return x1 * x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fmul_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fmul_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fmul_rn, __fmul_rn_ref, EqValidatorBuilderFactory()); + + +static float __fdiv_rn_ref(float x1, float x2) { return x1 / x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fdiv_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fdiv_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fdiv_rn, __fdiv_rn_ref, EqValidatorBuilderFactory()); + + +MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(__fdividef) { + x1 = 2.0f; + const auto abs_x2 = std::abs(x2); + if (std::pow(x1, -126.0f) <= abs_x2 && abs_x2 <= std::pow(x1, 126.0f)) { + return ULPValidatorBuilderFactory(2)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_BINARY_SP_KERNEL_DEF(__fdividef); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fdividef(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. For `|y|` in [2^-126, 2^126], the + * maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF(__fdividef, __fdiv_rn_ref); + + +/********** Ternary Functions **********/ + +#define MATH_TERNARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s, \ + float* const x2s, float* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_SP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2, float x3) + + +MATH_TERNARY_SP_KERNEL_DEF(__fmaf_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fmaf(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_SP_TEST_DEF_IMPL(__fmaf_rn, static_cast(std::fma), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/single_precision_intrinsics_negative_kernels.cc b/catch/unit/math/single_precision_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..f293894f83 --- /dev/null +++ b/catch/unit/math/single_precision_intrinsics_negative_kernels.cc @@ -0,0 +1,56 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x) { float result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { float result = func_name(x); } + +#define INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x, float y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(float x, float* y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, float y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(float x, Dummy y) { float result = func_name(x, y); } + +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__fsqrt_rn) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__expf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__exp10f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__logf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log2f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log10f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__sinf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__cosf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__tanf) + +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fadd_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fsub_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fmul_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdiv_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdividef) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__powf) \ No newline at end of file diff --git a/catch/unit/math/special_common.hh b/catch/unit/math/special_common.hh new file mode 100644 index 0000000000..4b55a88fee --- /dev/null +++ b/catch/unit/math/special_common.hh @@ -0,0 +1,145 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_BESSEL_N_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, int* n, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(n[i], xs[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(n[i], xs[i]); \ + } \ + } \ + } + +template using kernel_bessel_n_sig = void (*)(T*, const size_t, int*, T*); + +template using ref_bessel_n_sig = T (*)(int, T); + +template +void BesselDoublePrecisionBruteForceTest(kernel_bessel_n_sig kernel, + ref_bessel_n_sig ref_func, + const ValidatorBuilder& validator_builder, int n_input = 0, + const double a = std::numeric_limits::lowest(), + const double b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = std::min( + GetMaxAllowedDeviceMemoryUsage() / (sizeof(double) * 2 + sizeof(int)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(int)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(double)}; + + MathTest math_test(kernel, max_batch_size); + std::fill_n(x1s.ptr(), max_batch_size, n_input); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x2s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void BesselSinglePrecisionRangeTest(kernel_bessel_n_sig kernel, + ref_bessel_n_sig ref_func, + const ValidatorBuilder& validator_builder, int n_input, + const float a, const float b) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) * 2 + sizeof(int)); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(int)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + std::fill_n(x1s.ptr(), max_batch_size, n_input); + + size_t inserted = 0u; + for (float v = a; v != b; v = std::nextafter(v, b)) { + x2s.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, x1s.ptr(), + x2s.ptr()); + inserted = 0u; + } +} + +template +void SpecialSimpleTest(F kernel, const ValidatorBuilder& validator_builder, const T* x, + const T* ref, size_t num_args) { + LinearAllocGuard x_dev{LinearAllocs::hipMalloc, num_args * sizeof(T)}; + LinearAllocGuard y{LinearAllocs::hipHostMalloc, num_args * sizeof(T)}; + LinearAllocGuard y_dev{LinearAllocs::hipMalloc, num_args * sizeof(T)}; + + HIP_CHECK(hipMemcpy(x_dev.ptr(), x, num_args * sizeof(T), hipMemcpyHostToDevice)); + + kernel<<<1, num_args>>>(y_dev.ptr(), num_args, x_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y.ptr(), y_dev.ptr(), num_args * sizeof(T), hipMemcpyDeviceToHost)); + + for (auto i = 0u; i < num_args; ++i) { + const auto actual_val = y.ptr()[i]; + const auto ref_val = ref[i]; + const auto validator = validator_builder(ref_val); + + if (!validator->match(actual_val)) { + std::stringstream ss; + ss << "Input value(s): " << std::scientific + << std::setprecision(std::numeric_limits::max_digits10 - 1); + ss << x[i] << " " << actual_val << " " << ref_val << "\n"; + INFO(ss.str()); + REQUIRE(false); + } + } +} diff --git a/catch/unit/math/special_funcs.cc b/catch/unit/math/special_funcs.cc new file mode 100644 index 0000000000..5461afadd1 --- /dev/null +++ b/catch/unit/math/special_funcs.cc @@ -0,0 +1,1117 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "special_common.hh" +#include "math_special_func_kernels_rtc.hh" + +#include + + +/** + * @addtogroup SpecialMathFuncs SpecialMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erff(x)` for all possible inputs and `erf(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::erf(T)`. The maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(erf, 2, 2) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erff and erf. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erf_erff_Negative_RTC") { NegativeTestRTCWrapper<4>(kErf); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfcf(x)` for all possible inputs and `erfc(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::erfc(T)`. The maximum ulp error for single + * precision is 4 and for double precision is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(erfc, 4, 5) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfcf and erfc. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfc_erfcf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfc); } + +MATH_UNARY_KERNEL_DEF(erfinv) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfinvf(x)` for all possible inputs. The results are + * compared against reference function `double boost::math::erf_inv(double)`. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfinvf_Accuracy_Positive") { + auto erfinv_ref = [](double arg) -> double { + if (arg == 0) return 0; + if (arg == 1) + return std::numeric_limits::infinity(); + else if (arg == -1) + return -std::numeric_limits::infinity(); + else if (arg < -1 || arg > 1) + return std::numeric_limits::quiet_NaN(); + return boost::math::erf_inv(arg); + }; + double (*ref)(double) = erfinv_ref; + UnarySinglePrecisionTest(erfinv_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfinv(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double boost::math::erf_inv(long double)`. The maximum + * ulp error is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfinv_Accuracy_Positive") { + auto erfinv_ref = [](long double arg) -> long double { + if (arg == 0) return 0; + if (arg == 1) + return std::numeric_limits::infinity(); + else if (arg == -1) + return -std::numeric_limits::infinity(); + else if (arg < -1 || arg > 1) + return std::numeric_limits::quiet_NaN(); + return boost::math::erf_inv(arg); + }; + long double (*ref)(long double) = erfinv_ref; + UnaryDoublePrecisionTest(erfinv_kernel, ref, ULPValidatorBuilderFactory(5)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfinvf and erfinv. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfinv_erfinvf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfinv); } + +MATH_UNARY_KERNEL_DEF(erfcinv) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfcinvf(x)` for all possible inputs. The results are + * compared against reference function `double boost::math::erfc_inv(double)`. The maximum ulp error + * is 4. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcinvf_Accuracy_Positive") { + auto erfcinv_ref = [](double arg) -> double { + if (arg == 0) + return std::numeric_limits::infinity(); + else if (arg == 2) + return -std::numeric_limits::infinity(); + else if (arg < 0 || arg > 2) + return std::numeric_limits::quiet_NaN(); + return boost::math::erfc_inv(arg); + }; + double (*ref)(double) = erfcinv_ref; + UnarySinglePrecisionTest(erfcinv_kernel, ref, ULPValidatorBuilderFactory(4)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfcinv(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double boost::math::erfc_inv(long double)`. The maximum + * ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcinv_Accuracy_Positive") { + auto erfcinv_ref = [](long double arg) -> long double { + if (arg == 0) + return std::numeric_limits::infinity(); + else if (arg == 2) + return -std::numeric_limits::infinity(); + else if (arg < 0 || arg > 2) + return std::numeric_limits::quiet_NaN(); + return boost::math::erfc_inv(arg); + }; + long double (*ref)(long double) = erfcinv_ref; + UnaryDoublePrecisionTest(erfcinv_kernel, ref, ULPValidatorBuilderFactory(6)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfcinvf and erfcinv. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcinv_erfcinvf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfcinv); } + +MATH_UNARY_KERNEL_DEF(erfcx) + +/** + * Test Description + * ------------------------ + * - Sanity test for `erfcxf(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcxf_Sanity_Positive") { + constexpr std::array input{-std::numeric_limits::infinity(), + -1000.f, + -100.f, + -5.f, + -0.5f, + 0., + 0.75f, + 15.f, + 200.f, + 500.f, + std::numeric_limits::infinity()}; + constexpr std::array reference{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + 1.44009806e11f, + 1.95236027f, + 1.0f, + 5.06937683e-1f, + 3.75296101e-2f, + 2.82091252e-3f, + 1.12837693e-3f, + 0.f}; + SpecialSimpleTest(erfcx_kernel, ULPValidatorBuilderFactory(4), input.data(), + reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `erfcx(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcx_Sanity_Positive") { + constexpr std::array input{ + -std::numeric_limits::infinity(), -1000., -100., -5., -0.5, 0., 0.75, 15., 200., 500., + std::numeric_limits::infinity()}; + constexpr std::array reference{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + 1.4400979867466104e11, + 1.9523604891825568, + 1.0, + 5.0693765029314475e-1, + 3.7529606388505762e-2, + 2.8209126572120466e-3, + 1.1283769103507188e-3, + 0.}; + SpecialSimpleTest(erfcx_kernel, ULPValidatorBuilderFactory(4), + input.data(), reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfcxf and erfcx. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcx_erfcxf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfcx); } + +MATH_UNARY_KERNEL_DEF(normcdf) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `normcdff(x)` for all possible inputs. The maximum ulp error + * is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdff_Accuracy_Positive") { + auto normcdf_ref = [](double arg) -> double { return std::erfc(-arg / std::sqrt(2)) / 2; }; + double (*ref)(double) = normcdf_ref; + UnarySinglePrecisionTest(normcdf_kernel, ref, ULPValidatorBuilderFactory(5)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `normcdf(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdf_Accuracy_Positive") { + auto normcdf_ref = [](long double arg) -> long double { + return std::erfc(-arg / std::sqrt(2.L)) / 2; + }; + long double (*ref)(long double) = normcdf_ref; + UnaryDoublePrecisionTest(normcdf_kernel, ref, ULPValidatorBuilderFactory(5)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for normcdff and normcdf. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdf_normcdff_Negative_RTC") { NegativeTestRTCWrapper<4>(kNormcdf); } + +MATH_UNARY_KERNEL_DEF(normcdfinv) + +/** + * Test Description + * ------------------------ + * - Sanity test for `normcdfinvf(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdfinvf_Sanity_Positive") { + constexpr std::array input{0.f, 0.1f, 0.25f, 0.4f, 0.5f, 0.6f, 0.75f, 0.9f, 1.f}; + constexpr std::array reference{-std::numeric_limits::infinity(), + -1.28155160f, + -0.674489737f, + -0.253347069f, + 0, + 0.253347158f, + 0.674489737f, + 1.28155148f, + std::numeric_limits::infinity()}; + SpecialSimpleTest(normcdfinv_kernel, ULPValidatorBuilderFactory(5), + input.data(), reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `normcdfinv(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdfinv_Sanity_Positive") { + constexpr std::array input{0., 0.1, 0.25, 0.4, 0.5, 0.6, 0.75, 0.9, 1.}; + constexpr std::array reference{-std::numeric_limits::infinity(), + -1.2815515655446004, + -0.67448975019608159, + -0.25334710313579972, + 0, + 0.25334710313579972, + 0.67448975019608159, + 1.2815515655446006, + std::numeric_limits::infinity()}; + SpecialSimpleTest(normcdfinv_kernel, ULPValidatorBuilderFactory(5), + input.data(), reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for normcdfinvf and normcdfinv. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdfinv_normcdfinvf_Negative_RTC") { + NegativeTestRTCWrapper<4>(kNormcdfinv); +} + +MATH_UNARY_KERNEL_DEF(tgamma) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `tgammaf(x)` for all possible inputs below 171.7 and that + * are not very small negative numbers, as they lead to overflow for IEEE compatible double. The + * results are compared against reference function `double std::tgamma(double)`. The maximum ulp + * error is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_tgammaf_Accuracy_Limited_Positive") { + double (*ref)(double) = std::tgamma; + UnarySinglePrecisionRangeTest(tgamma_kernel, ref, ULPValidatorBuilderFactory(5), + std::numeric_limits::lowest(), -0.001f); + UnarySinglePrecisionRangeTest(tgamma_kernel, ref, ULPValidatorBuilderFactory(5), 0, + 171.7); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `tgamma(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double std::tgamma(long double)`. The maximum ulp error + * is 10. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_tgamma_Accuracy_Limited_Positive") { + long double (*ref)(long double) = std::tgamma; + UnaryDoublePrecisionTest(tgamma_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for tgammaf and tgamma. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_tgamma_tgammaf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTgamma); } + +MATH_UNARY_KERNEL_DEF(lgamma) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `lgammaf(x)` for all possible inputs. The results are + * compared against reference function `double std::lgamma(double)`. For `x` outside interval + * -11.0001 … -2.2637, the maximum ulp error is 4, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_lgammaf_Accuracy_Limited_Positive") { + double (*ref)(double) = std::lgamma; + UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(6), + std::numeric_limits::lowest(), -11.0001f); + UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(6), + -2.2636f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `lgamma(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against + * reference function `long double std::lgamma(long double)`. For `x` outside interval -11.0001 … + * -2.2637, the maximum ulp error is 4, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_lgamma_Accuracy_Limited_Positive") { + long double (*ref)(long double) = std::lgamma; + UnaryDoublePrecisionBruteForceTest(lgamma_kernel, ref, + ULPValidatorBuilderFactory(4), + std::numeric_limits::lowest(), -11.0001); + UnaryDoublePrecisionBruteForceTest(lgamma_kernel, ref, + ULPValidatorBuilderFactory(4), -2.2636, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for lgammaf and lgamma. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_lgamma_lgammaf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLgamma); } + +MATH_UNARY_KERNEL_DEF(cyl_bessel_i0) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i0f(x)` for all possible inputs in range [0, + * 10000). The results are compared against reference function `double std::cyl_bessel_i(0, + * double)`. The maximum ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive") { + auto cyl_bessel_i0_ref = [](double arg) -> double { return std::cyl_bessel_i(0, arg); }; + double (*ref)(double) = cyl_bessel_i0_ref; + UnarySinglePrecisionRangeTest(cyl_bessel_i0_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i0(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range [0, 10000). The results are + * compared against reference function `long double std::cyl_bessel_i(0, long double)`. The maximum + * ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive") { + auto cyl_bessel_i0_ref = [](long double arg) -> long double { return std::cyl_bessel_i(0, arg); }; + long double (*ref)(long double) = cyl_bessel_i0_ref; + UnaryDoublePrecisionBruteForceTest(cyl_bessel_i0_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cyl_bessel_i0f and cyl_bessel_i0. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i0_cyl_bessel_i0f_Negative_RTC") { + NegativeTestRTCWrapper<4>(kCylBesselI0); +} + +MATH_UNARY_KERNEL_DEF(cyl_bessel_i1) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i1f(x)` for all possible inputs in range [0, + * 10000). The results are compared against reference function `double std::cyl_bessel_i(1, + * double)`. The maximum ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive") { + auto cyl_bessel_i1_ref = [](double arg) -> double { return std::cyl_bessel_i(1, arg); }; + double (*ref)(double) = cyl_bessel_i1_ref; + UnarySinglePrecisionRangeTest(cyl_bessel_i1_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i1(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range [0, 10000). The results are + * compared against reference function `long double std::cyl_bessel_i(1, long double)`. The maximum + * ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive") { + auto cyl_bessel_i1_ref = [](long double arg) -> long double { return std::cyl_bessel_i(1, arg); }; + long double (*ref)(long double) = cyl_bessel_i1_ref; + UnaryDoublePrecisionBruteForceTest(cyl_bessel_i1_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cyl_bessel_i1f and cyl_bessel_i1. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i1_cyl_bessel_i1f_Negative_RTC") { + NegativeTestRTCWrapper<4>(kCylBesselI1); +} + +/********** Bessel Functions **********/ + +MATH_UNARY_KERNEL_DEF(y0) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y0f(x)` for all possible inputs. The results are + * compared against reference function `double y0(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y0f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = y0; +#elif _WIN64 + double (*ref)(double) = _y0; +#endif + UnarySinglePrecisionRangeTest(y0_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(y0_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y0(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double y0l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y0_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = y0l; +#elif _WIN64 + long double (*ref)(long double) = _y0l; +#endif + UnaryDoublePrecisionBruteForceTest(y0_kernel, ref, + AbsValidatorBuilderFactory(5.e-12), -8., + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for y0f and y0. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y0_y0f_Negative_RTC") { NegativeTestRTCWrapper<4>(kY0); } + +MATH_UNARY_KERNEL_DEF(y1) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y1f(x)` for all possible inputs. The results are + * compared against reference function `double y1(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y1f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = y1; +#elif _WIN64 + double (*ref)(double) = _y1; +#endif + UnarySinglePrecisionRangeTest(y1_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(y1_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y1(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double y1l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y1_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = y1l; +#elif _WIN64 + long double (*ref)(long double) = _y1l; +#endif + UnaryDoublePrecisionBruteForceTest(y1_kernel, ref, + AbsValidatorBuilderFactory(5.e-12), -8., + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for y1f and y1. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y1_y1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kY1); } + +MATH_BESSEL_N_KERNEL_DEF(yn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ynf(x)` for all possible inputs and n equal to 5, 25 or + * 120. The results are compared against reference function `double yn(int, double)`. For `x` larger + * than n, the maximum absolute error is 2.2x10^-6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ynf_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(int, double) = yn; +#elif _WIN64 + double (*ref)(int, double) = _yn; +#endif + int n = GENERATE(5, 25, 120); + BesselSinglePrecisionRangeTest(yn_kernel, ref, AbsValidatorBuilderFactory(0.0000022), n, n, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `yn(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range and n equal to 5, 25, or 120. + * The results are compared against reference function `long double ynl(int, long double)`. For `x` + * larger than 1.5n, the maximum absolute error is 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_yn_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(int, long double) = ynl; +#elif _WIN64 + long double (*ref)(int, long double) = _ynl; +#endif + int n = GENERATE(5, 25, 120); + BesselDoublePrecisionBruteForceTest(yn_kernel, ref, + AbsValidatorBuilderFactory(5.e-12), n, 1.5 * n, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for ynf and yn. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_yn_ynf_Negative_RTC") { NegativeTestRTCWrapper<8>(kYn); } + +MATH_UNARY_KERNEL_DEF(j0) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j0f(x)` for all possible inputs. The results are + * compared against reference function `double j0(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j0f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = j0; +#elif _WIN64 + double (*ref)(double) = _j0; +#endif + UnarySinglePrecisionRangeTest(j0_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + std::numeric_limits::lowest(), -8.f); + UnarySinglePrecisionRangeTest(j0_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(j0_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j0(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double j0l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j0_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = j0l; +#elif _WIN64 + long double (*ref)(long double) = _j0l; +#endif + UnaryDoublePrecisionBruteForceTest( + j0_kernel, ref, AbsValidatorBuilderFactory(5.e-12), + std::numeric_limits::lowest(), std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for j0f and j0. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j0_j0f_Negative_RTC") { NegativeTestRTCWrapper<4>(kJ0); } + +MATH_UNARY_KERNEL_DEF(j1) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j1f(x)` for all possible inputs. The results are + * compared against reference function `double j1(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j1f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = j1; +#elif _WIN64 + double (*ref)(double) = _j1; +#endif + UnarySinglePrecisionRangeTest(j1_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + std::numeric_limits::lowest(), -8.f); + UnarySinglePrecisionRangeTest(j1_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(j1_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j1(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double j1l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j1_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = j1l; +#elif _WIN64 + long double (*ref)(long double) = _j1l; +#endif + UnaryDoublePrecisionBruteForceTest( + j1_kernel, ref, AbsValidatorBuilderFactory(5.e-12), + std::numeric_limits::lowest(), std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for j1f and j1. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j1_j1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kJ1); } + +MATH_BESSEL_N_KERNEL_DEF(jn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `jnf(x)` for all possible inputs and n equal to 5, 25 or + * 120. The results are compared against reference function `double jn(int, double)`. For `x` larger + * than n, the maximum absolute error is 2.2x10^-6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_jnf_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(int, double) = jn; +#elif _WIN64 + double (*ref)(int, double) = _jn; +#endif + int n = GENERATE(5, 25, 120); + BesselSinglePrecisionRangeTest(jn_kernel, ref, AbsValidatorBuilderFactory(0.0000022), n, n, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `jn(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range and n equal to 5, 25, or 120. + * The results are compared against reference function `long double jnl(int, long double)`. The + * maximum absolute error is 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_jn_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(int, long double) = jnl; +#elif _WIN64 + long double (*ref)(int, long double) = _jnl; +#endif + int n = GENERATE(5, 25, 120); + BesselDoublePrecisionBruteForceTest( + jn_kernel, ref, AbsValidatorBuilderFactory(5.e-12), n, + std::numeric_limits::lowest(), std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for jnf and jn. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_jn_jnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kJn); } diff --git a/catch/unit/math/ternary_common.hh b/catch/unit/math/ternary_common.hh new file mode 100644 index 0000000000..fef750bded --- /dev/null +++ b/catch/unit/math/ternary_common.hh @@ -0,0 +1,151 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_TERNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, T* const x1s, T* const x2s, \ + T* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i], x3s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } \ + } + +template +void TernaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 3 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s, &x3s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + if constexpr (std::is_same_v) { + std::uniform_real_distribution> unif_dist(-FLOAT16_MAX, FLOAT16_MAX); + return static_cast(unif_dist(rng)); + } else { + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + } + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x3s.ptr() + base_idx, x3s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr(), x3s.ptr()); + } +} + +template +void TernaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + using SpecialValsType = std::conditional_t, float, TArg>; + const auto values = std::get>(kSpecialValRegistry); + + const auto size = values.size * values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + for (auto k = 0u; k < values.size; ++k) { + x1s.ptr()[(i * values.size + j) * values.size + k] = values.data[i]; + x2s.ptr()[(i * values.size + j) * values.size + k] = values.data[j]; + x3s.ptr()[(i * values.size + j) * values.size + k] = values.data[k]; + } + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr(), x3s.ptr()); +} + +template +void TernaryFloatingPointTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + TernaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { + TernaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); + } +} + + +#define MATH_TERNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_TERNARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + TernaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } diff --git a/catch/unit/math/thread_pool.hh b/catch/unit/math/thread_pool.hh new file mode 100644 index 0000000000..d45e5e8b1b --- /dev/null +++ b/catch/unit/math/thread_pool.hh @@ -0,0 +1,64 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +#include +#include + +// This is a simple wrapper around boost::asio::thread_pool that keeps track of the number of +// currently active tasks using an atomic counter. +class ThreadPool { + public: + ThreadPool(size_t thread_count = std::thread::hardware_concurrency()) + : thread_count_(thread_count) {} + + ~ThreadPool() { thread_pool_.join(); } + + // Submits a task to the thread pool and increments the number of active tasks. The task is + // wrapped in a lambda that decrements the number of active tasks upon completion. + template void Post(T&& task) { + ++active_tasks_; + auto&& task_wrapper = [task, this] { + task(); + --active_tasks_; + }; + boost::asio::post(thread_pool_, task_wrapper); + } + + // Busy waits for the number of active tasks to reach zero. + void Wait() const { + while (active_tasks_.load(std::memory_order_relaxed)) + ; + } + + size_t thread_count() const { return thread_count_; } + + private: + const size_t thread_count_; + boost::asio::thread_pool thread_pool_{thread_count_}; + std::atomic active_tasks_; +}; + +inline ThreadPool thread_pool{}; diff --git a/catch/unit/math/trig_double_precision_negative_kernels.cc b/catch/unit/math/trig_double_precision_negative_kernels.cc new file mode 100644 index 0000000000..2008837fd4 --- /dev/null +++ b/catch/unit/math/trig_double_precision_negative_kernels.cc @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define TRIG_DP_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +/*Expecting 2 errors per macro invocation - 26 total*/ +TRIG_DP_UNARY_NEGATIVE_KERNELS(sin) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cos) +TRIG_DP_UNARY_NEGATIVE_KERNELS(tan) +TRIG_DP_UNARY_NEGATIVE_KERNELS(asin) +TRIG_DP_UNARY_NEGATIVE_KERNELS(acos) +TRIG_DP_UNARY_NEGATIVE_KERNELS(atan) +TRIG_DP_UNARY_NEGATIVE_KERNELS(sinh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cosh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(tanh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(asinh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(atanh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(sinpi) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cospi) + +/*Expecting 4 errors*/ +__global__ void atan2_kernel_v1(double* x, double y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v2(double x, double* y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v3(Dummy x, double y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v4(double x, Dummy y) { double result = atan2(x, y); } + +/*Expecting 18 errors*/ +__global__ void sincos_kernel_v1(double* x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v2(Dummy x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v3(double x, char* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v4(double x, short* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v5(double x, int* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v6(double x, long* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v7(double x, long long* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v8(double x, float* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v9(double x, Dummy* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v10(double x, const double* sptr, double* cptr) { + sincos(x, sptr, cptr); +} +__global__ void sincos_kernel_v11(double x, double* sptr, char* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v12(double x, double* sptr, short* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v13(double x, double* sptr, int* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v14(double x, double* sptr, long* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v15(double x, double* sptr, long long* cptr) { + sincos(x, sptr, cptr); +} +__global__ void sincos_kernel_v16(double x, double* sptr, float* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v17(double x, double* sptr, Dummy* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v18(double x, double* sptr, const double* cptr) { + sincos(x, sptr, cptr); +} + +/*Expecting 18 errors*/ +__global__ void sincospi_kernel_v1(float* x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v3(float x, char* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v4(float x, short* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v5(float x, int* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v6(float x, long* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v7(float x, long long* sptr, float* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v8(float x, double* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v10(float x, const float* sptr, float* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v11(float x, float* sptr, char* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v12(float x, float* sptr, short* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v13(float x, float* sptr, int* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v14(float x, float* sptr, long* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v15(float x, float* sptr, long long* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v16(float x, float* sptr, double* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v17(float x, float* sptr, Dummy* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v18(float x, float* sptr, const float* cptr) { + sincospi(x, sptr, cptr); +} \ No newline at end of file diff --git a/catch/unit/math/trig_funcs.cc b/catch/unit/math/trig_funcs.cc new file mode 100644 index 0000000000..9671b94ab9 --- /dev/null +++ b/catch/unit/math/trig_funcs.cc @@ -0,0 +1,137 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "trig_negative_kernels_rtc.hh" + +#include "unary_common.hh" +#include "binary_common.hh" + +#include + + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sin, std::sin, 2, 2); +TEST_CASE("Unit_Device_sin_sinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSin); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cos, std::cos, 2, 2) +TEST_CASE("Unit_Device_cos_cosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCos); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(tan, std::tan, 4, 2) +TEST_CASE("Unit_Device_tan_tanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTan); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 2, 2) +TEST_CASE("Unit_Device_asin_asinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsin); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(acos, std::acos, 2, 2) +TEST_CASE("Unit_Device_acos_acosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAcos); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(atan, std::atan, 2, 2) +TEST_CASE("Unit_Device_atan_atanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAtan); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sinh, std::sinh, 3, 2) +TEST_CASE("Unit_Device_sinh_sinhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSinh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cosh, std::cosh, 2, 1) +TEST_CASE("Unit_Device_cosh_coshf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCosh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(tanh, std::tanh, 2, 1) +TEST_CASE("Unit_Device_tanh_tanhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTanh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(asinh, std::asinh, 3, 2) +TEST_CASE("Unit_Device_asinh_asinhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsinh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(acosh, std::acosh, 4, 2) +TEST_CASE("Unit_Device_acosh_acoshf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAcosh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(atanh, std::atanh, 3, 2) +TEST_CASE("Unit_Device_atanh_atanhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAtanh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sinpi, boost::math::sin_pi, 2, 2); +TEST_CASE("Unit_Device_sinpi_sinpif_Negative_RTC") { NegativeTestRTCWrapper<4>(kSinpi); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cospi, boost::math::cos_pi, 2, 2); +TEST_CASE("Unit_Device_cospi_cospif_Negative_RTC") { NegativeTestRTCWrapper<4>(kCospi); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(atan2, std::atan2, 3, 2); +TEST_CASE("Unit_Device_atan2_atan2f_Negative_RTC") { NegativeTestRTCWrapper<8>(kAtan2); } + + +template +__global__ void sincos_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + sincosf(xs[i], &ys[i].first, &ys[i].second); + } else if constexpr (std::is_same_v) { + sincos(xs[i], &ys[i].first, &ys[i].second); + } + } +} + +template std::pair sincos(T x) { return {std::sin(x), std::cos(x)}; } + +TEST_CASE("Unit_Device_sincos_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + sincos_kernel, sincos, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2))); +} + +TEST_CASE("Unit_Device_sincos_Accuracy_Positive - double") { + const auto validator_builder = + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2)); + UnaryDoublePrecisionTest(sincos_kernel, sincos, validator_builder); +} + +TEST_CASE("Unit_Device_sincos_sincosf_Negative_RTC") { NegativeTestRTCWrapper<36>(kSincos); } + + +template +__global__ void sincospi_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + sincospif(xs[i], &ys[i].first, &ys[i].second); + } else if constexpr (std::is_same_v) { + sincospi(xs[i], &ys[i].first, &ys[i].second); + } + } +} + +template std::pair sincospi(T x) { + return {boost::math::sin_pi(x), boost::math::cos_pi(x)}; +} + +TEST_CASE("Unit_Device_sincospi_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + sincospi_kernel, sincospi, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2))); +} + +TEST_CASE("Unit_Device_sincospi_Accuracy_Positive - double") { + const auto validator_builder = + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2)); + UnaryDoublePrecisionTest(sincospi_kernel, sincospi, validator_builder); +} + +TEST_CASE("Unit_Device_sincospi_sincospif_Negative_RTC") { NegativeTestRTCWrapper<36>(kSincospi); } \ No newline at end of file diff --git a/catch/unit/math/trig_negative_kernels_rtc.hh b/catch/unit/math/trig_negative_kernels_rtc.hh new file mode 100644 index 0000000000..1c855d755e --- /dev/null +++ b/catch/unit/math/trig_negative_kernels_rtc.hh @@ -0,0 +1,320 @@ +// #define TRIG_UNARY_NEGATIVE_KERNELS(func_name) +// class Dummy { +// public: +// __device__ Dummy() {} +// __device__ ~Dummy() {} +// }; +// __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } +// __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } +// __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } +// __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +static constexpr auto kSin{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinf_kernel_v1(float* x) { float result = sinf(x); } + __global__ void sinf_kernel_v2(Dummy x) { float result = sinf(x); } + __global__ void sin_kernel_v1(double* x) { double result = sin(x); } + __global__ void sin_kernel_v2(Dummy x) { double result = sin(x); } + )"}; + +static constexpr auto kCos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cosf_kernel_v1(float* x) { float result = cosf(x); } + __global__ void cosf_kernel_v2(Dummy x) { float result = cosf(x); } + __global__ void cos_kernel_v1(double* x) { double result = cos(x); } + __global__ void cos_kernel_v2(Dummy x) { double result = cos(x); } + )"}; + +static constexpr auto kTan{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tanf_kernel_v1(float* x) { float result = tanf(x); } + __global__ void tanf_kernel_v2(Dummy x) { float result = tanf(x); } + __global__ void tan_kernel_v1(double* x) { double result = tan(x); } + __global__ void tan_kernel_v2(Dummy x) { double result = tan(x); } + )"}; + +static constexpr auto kAsin{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void asinf_kernel_v1(float* x) { float result = asinf(x); } + __global__ void asinf_kernel_v2(Dummy x) { float result = asinf(x); } + __global__ void asin_kernel_v1(double* x) { double result = asin(x); } + __global__ void asin_kernel_v2(Dummy x) { double result = asin(x); } + )"}; + +static constexpr auto kAcos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void acosf_kernel_v1(float* x) { float result = acosf(x); } + __global__ void acosf_kernel_v2(Dummy x) { float result = acosf(x); } + __global__ void acos_kernel_v1(double* x) { double result = acos(x); } + __global__ void acos_kernel_v2(Dummy x) { double result = acos(x); } + )"}; + +static constexpr auto kAtan{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atanf_kernel_v1(float* x) { float result = atanf(x); } + __global__ void atanf_kernel_v2(Dummy x) { float result = atanf(x); } + __global__ void atan_kernel_v1(double* x) { double result = atan(x); } + __global__ void atan_kernel_v2(Dummy x) { double result = atan(x); } + )"}; + +static constexpr auto kSinh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinhf_kernel_v1(float* x) { float result = sinhf(x); } + __global__ void sinhf_kernel_v2(Dummy x) { float result = sinhf(x); } + __global__ void sinh_kernel_v1(double* x) { double result = sinh(x); } + __global__ void sinh_kernel_v2(Dummy x) { double result = sinh(x); } + )"}; + +static constexpr auto kCosh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void coshf_kernel_v1(float* x) { float result = coshf(x); } + __global__ void coshf_kernel_v2(Dummy x) { float result = coshf(x); } + __global__ void cosh_kernel_v1(double* x) { double result = cosh(x); } + __global__ void cosh_kernel_v2(Dummy x) { double result = cosh(x); } + )"}; + +static constexpr auto kTanh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tanhf_kernel_v1(float* x) { float result = tanhf(x); } + __global__ void tanhf_kernel_v2(Dummy x) { float result = tanhf(x); } + __global__ void tanh_kernel_v1(double* x) { double result = tanh(x); } + __global__ void tanh_kernel_v2(Dummy x) { double result = tanh(x); } + )"}; + +static constexpr auto kAsinh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void asinhf_kernel_v1(float* x) { float result = asinhf(x); } + __global__ void asinhf_kernel_v2(Dummy x) { float result = asinhf(x); } + __global__ void asinh_kernel_v1(double* x) { double result = asinh(x); } + __global__ void asinh_kernel_v2(Dummy x) { double result = asinh(x); } + )"}; + +static constexpr auto kAcosh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void acoshf_kernel_v1(float* x) { float result = acoshf(x); } + __global__ void acoshf_kernel_v2(Dummy x) { float result = acoshf(x); } + __global__ void acosh_kernel_v1(double* x) { double result = acosh(x); } + __global__ void acosh_kernel_v2(Dummy x) { double result = acosh(x); } + )"}; + +static constexpr auto kAtanh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atanhf_kernel_v1(float* x) { float result = atanhf(x); } + __global__ void atanhf_kernel_v2(Dummy x) { float result = atanhf(x); } + __global__ void atanh_kernel_v1(double* x) { double result = atanh(x); } + __global__ void atanh_kernel_v2(Dummy x) { double result = atanh(x); } + )"}; + +static constexpr auto kSinpi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinpif_kernel_v1(float* x) { float result = sinpif(x); } + __global__ void sinpif_kernel_v2(Dummy x) { float result = sinpif(x); } + __global__ void sinpi_kernel_v1(double* x) { double result = sinpi(x); } + __global__ void sinpi_kernel_v2(Dummy x) { double result = sinpi(x); } + )"}; + +static constexpr auto kCospi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cospif_kernel_v1(float* x) { float result = cospif(x); } + __global__ void cospif_kernel_v2(Dummy x) { float result = cospif(x); } + __global__ void cospi_kernel_v1(double* x) { double result = cospi(x); } + __global__ void cospi_kernel_v2(Dummy x) { double result = cospi(x); } + )"}; + +static constexpr auto kAtan2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atan2f_kernel_v1(float* x, float y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v2(float x, float* y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v3(Dummy x, float y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v4(float x, Dummy y) { float result = atan2f(x, y); } + __global__ void atan2_kernel_v1(double* x, double y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v2(double x, double* y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v3(Dummy x, double y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v4(double x, Dummy y) { double result = atan2(x, y); } + )"}; + +static constexpr auto kSincos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sincosf_kernel_v1(float* x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v2(Dummy x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v3(float x, char* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v4(float x, short* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v5(float x, int* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v6(float x, long* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v7(float x, long long* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v8(float x, double* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v9(float x, Dummy* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v10(float x, const float* sptr, float* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincosf_kernel_v11(float x, float* sptr, char* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v12(float x, float* sptr, short* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v13(float x, float* sptr, int* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v14(float x, float* sptr, long* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v15(float x, float* sptr, long long* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincosf_kernel_v16(float x, float* sptr, double* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v17(float x, float* sptr, Dummy* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v18(float x, float* sptr, const float* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincos_kernel_v1(double* x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v2(Dummy x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v3(double x, char* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v4(double x, short* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v5(double x, int* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v6(double x, long* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v7(double x, long long* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v8(double x, float* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v9(double x, Dummy* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v10(double x, const double* sptr, double* cptr) { + sincos(x, sptr, cptr); + } + __global__ void sincos_kernel_v11(double x, double* sptr, char* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v12(double x, double* sptr, short* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v13(double x, double* sptr, int* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v14(double x, double* sptr, long* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v15(double x, double* sptr, long long* cptr) { + sincos(x, sptr, cptr); + } + __global__ void sincos_kernel_v16(double x, double* sptr, float* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v17(double x, double* sptr, Dummy* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v18(double x, double* sptr, const double* cptr) { + sincos(x, sptr, cptr); + } + )"}; + +static constexpr auto kSincospi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sincospif_kernel_v1(float* x, float* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v3(float x, char* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v4(float x, short* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v5(float x, int* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v6(float x, long* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v7(float x, long long* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v8(float x, double* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v10(float x, const float* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v11(float x, float* sptr, char* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v12(float x, float* sptr, short* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v13(float x, float* sptr, int* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v14(float x, float* sptr, long* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v15(float x, float* sptr, long long* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v16(float x, float* sptr, double* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v17(float x, float* sptr, Dummy* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v18(float x, float* sptr, const float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospi_kernel_v1(float* x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v3(float x, char* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v4(float x, short* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v5(float x, int* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v6(float x, long* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v7(float x, long long* sptr, float* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v8(float x, double* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v10(float x, const float* sptr, float* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v11(float x, float* sptr, char* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v12(float x, float* sptr, short* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v13(float x, float* sptr, int* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v14(float x, float* sptr, long* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v15(float x, float* sptr, long long* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v16(float x, float* sptr, double* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v17(float x, float* sptr, Dummy* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v18(float x, float* sptr, const float* cptr) { + sincospi(x, sptr, cptr); + } + )"}; \ No newline at end of file diff --git a/catch/unit/math/trig_single_precision_negative_kernels.cc b/catch/unit/math/trig_single_precision_negative_kernels.cc new file mode 100644 index 0000000000..5e66d386be --- /dev/null +++ b/catch/unit/math/trig_single_precision_negative_kernels.cc @@ -0,0 +1,118 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define TRIG_SP_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +/*Expecting 2 errors per macro invocation - 26 total*/ +TRIG_SP_UNARY_NEGATIVE_KERNELS(sin) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cos) +TRIG_SP_UNARY_NEGATIVE_KERNELS(tan) +TRIG_SP_UNARY_NEGATIVE_KERNELS(asin) +TRIG_SP_UNARY_NEGATIVE_KERNELS(acos) +TRIG_SP_UNARY_NEGATIVE_KERNELS(atan) +TRIG_SP_UNARY_NEGATIVE_KERNELS(sinh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cosh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(tanh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(asinh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(atanh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(sinpi) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cospi) + +/*Expecting 4 errors*/ +__global__ void atan2f_kernel_v1(float* x, float y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v2(float x, float* y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v3(Dummy x, float y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v4(float x, Dummy y) { float result = atan2f(x, y); } + +/*Expecting 18 errors*/ +__global__ void sincosf_kernel_v1(float* x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v2(Dummy x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v3(float x, char* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v4(float x, short* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v5(float x, int* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v6(float x, long* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v7(float x, long long* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v8(float x, double* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v9(float x, Dummy* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v10(float x, const float* sptr, float* cptr) { + sincosf(x, sptr, cptr); +} +__global__ void sincosf_kernel_v11(float x, float* sptr, char* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v12(float x, float* sptr, short* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v13(float x, float* sptr, int* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v14(float x, float* sptr, long* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v15(float x, float* sptr, long long* cptr) { + sincosf(x, sptr, cptr); +} +__global__ void sincosf_kernel_v16(float x, float* sptr, double* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v17(float x, float* sptr, Dummy* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v18(float x, float* sptr, const float* cptr) { + sincosf(x, sptr, cptr); +} + +/*Expecting 18 errors*/ +__global__ void sincospif_kernel_v1(float* x, float* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v3(float x, char* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v4(float x, short* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v5(float x, int* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v6(float x, long* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v7(float x, long long* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v8(float x, double* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v10(float x, const float* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v11(float x, float* sptr, char* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v12(float x, float* sptr, short* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v13(float x, float* sptr, int* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v14(float x, float* sptr, long* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v15(float x, float* sptr, long long* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v16(float x, float* sptr, double* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v17(float x, float* sptr, Dummy* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v18(float x, float* sptr, const float* cptr) { + sincospif(x, sptr, cptr); +} \ No newline at end of file diff --git a/catch/unit/math/unary_common.hh b/catch/unit/math/unary_common.hh new file mode 100644 index 0000000000..180b963f4a --- /dev/null +++ b/catch/unit/math/unary_common.hh @@ -0,0 +1,243 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_UNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(RT* const ys, const size_t num_xs, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(xs[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(xs[i]); \ + } \ + } \ + } + +template +void UnaryHalfPrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint16_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnarySinglePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint32_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnarySinglePrecisionRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, const float a, + const float b) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + + size_t inserted = 0u; + for (float v = a; v != b; v = std::nextafter(v, b)) { + values.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); + inserted = 0u; + } +} + +template +void UnaryDoublePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const double a = std::numeric_limits::lowest(), + const double b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(double) + sizeof(T)), num_iterations); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(double)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &values] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(values.ptr() + base_idx, values.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnaryDoublePrecisionSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + MathTest math_test(kernel, values.size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, values.size, + values.data); +} + +template +void UnaryHalfPrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { UnaryHalfPrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +template +void UnarySinglePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { UnarySinglePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +template +void UnaryDoublePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + UnaryDoublePrecisionSpecialValuesTest(kernel, ref, validator_builder); + } + + SECTION("Brute force") { UnaryDoublePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +#define MATH_UNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_UNARY_KERNEL_DEF(kern_name) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + double (*ref)(double) = ref_func; \ + UnarySinglePrecisionTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(sp_ulp)); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + long double (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(dp_ulp)); \ + } + +#define MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(func_name, sp_ulp, dp_ulp) \ + MATH_UNARY_WITHIN_ULP_TEST_DEF(func_name, std::func_name, sp_ulp, dp_ulp) diff --git a/catch/unit/math/validators.hh b/catch/unit/math/validators.hh new file mode 100644 index 0000000000..e8bb220b3c --- /dev/null +++ b/catch/unit/math/validators.hh @@ -0,0 +1,152 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +// Define a new MatcherBase class with a public 'describe' member function because +// Catch::MatcherBase::describe is protected and thus can't be used via a pointer to +// Catch::MatcherBase. +template class MatcherBase : public Catch::MatcherBase { + public: + virtual std::string describe() const = 0; + virtual ~MatcherBase() = default; +}; + +template class ValidatorBase : public MatcherBase { + public: + template + ValidatorBase(T target, Ts&&... args) : matcher_{std::forward(args)...}, target_{target} {} + + bool match(const T& val) const override { + if (std::isnan(target_)) { + return std::isnan(val); + } + + return matcher_.match(val); + } + + std::string describe() const override { + if (std::isnan(target_)) { + return "is not NaN"; + } + + return matcher_.describe(); + } + + private: + Matcher matcher_; + T target_; + bool nan = false; +}; + +template auto ULPValidatorBuilderFactory(int64_t ulps) { + return [=](T target, auto&&...) { + return std::make_unique>( + target, Catch::WithinULP(target, ulps)); + }; +}; + +template auto AbsValidatorBuilderFactory(double margin) { + return [=](T target, auto&&...) { + return std::make_unique>( + target, Catch::WithinAbs(target, margin)); + }; +} + +template auto RelValidatorBuilderFactory(T margin) { + return [=](T target, auto&&...) { + return std::make_unique>( + target, Catch::WithinRel(target, margin)); + }; +} + +template class EqValidator : public MatcherBase { + public: + EqValidator(T target) : target_{target} {} + + bool match(const T& val) const override { + if (std::isnan(target_)) { + return std::isnan(val); + } + + return target_ == val; + } + + std::string describe() const override { + std::stringstream ss; + ss << " is not equal to " << target_; + return ss.str(); + } + + private: + T target_; +}; + +template auto EqValidatorBuilderFactory() { + return [](T val, auto&&...) { return std::make_unique>(val); }; +} + +template +class PairValidator : public MatcherBase> { + public: + PairValidator(const std::pair& target, const VBF& vbf, const VBS& vbs) + : first_matcher_{vbf(target.first)}, second_matcher_{vbs(target.second)} {} + + bool match(const std::pair& val) const override { + return first_matcher_->match(val.first) && second_matcher_->match(val.second); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(std::declval())) first_matcher_; + decltype(std::declval()(std::declval())) second_matcher_; +}; + +template +auto PairValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const std::pair& t, auto&&...) { + return std::make_unique>(t, vb, vb); + }; +} + +template +auto PairValidatorBuilderFactory(const VBF& vbf, const VBS& vbs) { + return [=](const std::pair& t, auto&&...) { + return std::make_unique>(t, vbf, vbs); + }; +} + +template class NopValidator : public MatcherBase { + public: + bool match(const T&) const override { return true; } + + std::string describe() const override { return ""; } +}; + +template auto NopValidatorBuilderFactory() { + return [](auto&&...) { return std::make_unique>(); }; +} diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index fda74f5b2e..025e369b50 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -93,7 +93,13 @@ if(HIP_PLATFORM MATCHES "amd") hipMemAddressFree.cc hipMemAddressReserve.cc hipMemRelease.cc - hipMemGetAllocationPropertiesFromHandle.cc) + hipMemGetAllocationPropertiesFromHandle.cc + hipArray.cc) + if(UNIX) + # Should be compiled for NVIDIA as well after EXSWHTEC-346 is addressed + # For windows build error occurs undefined symbol: hipPointerSetAttribute + set(TEST_SRC ${TEST_SRC} hipPointerSetAttribute.cc) + endif() else() set(TEST_SRC ${TEST_SRC} hipGetSymbolSizeAddress.cc) endif() @@ -161,10 +167,18 @@ set(TEST_SRC hipStreamAttachMemAsync.cc hipMemRangeGetAttributes_old.cc hipMemGetAddressRange.cc - hipArrayGetDescriptor.cc hipMallocMipmappedArray.cc hipFreeMipmappedArray.cc) +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + ${TEST_SRC} + # Below 3 tests should be compiled for NVIDIA as well after EXSWHTEC-349 is addressed + hipArrayGetInfo.cc + hipArrayGetDescriptor.cc + hipArray3DGetDescriptor.cc) +endif() + set(NOT_FOR_MI200_AND_ABOVE_TEST hipMallocArray.cc hipArrayCreate.cc) # tests not for MI200+ set(MI200_AND_ABOVE_TARGETS gfx90a gfx940 gfx941 gfx942) function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL) @@ -179,7 +193,6 @@ function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL) endif() # CMAKE_MATCH_COUNT endforeach() # OFFLOAD_ARCH_LIST endfunction() # CheckAcceptedArchs - if(HIP_PLATFORM MATCHES "amd") if (DEFINED OFFLOAD_ARCH_STR) CheckRejectedArchs(${OFFLOAD_ARCH_STR}) @@ -213,4 +226,4 @@ if(HIP_PLATFORM MATCHES "amd") TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) add_dependencies(build_tests hipHostRegisterPerf) -endif() +endif() \ No newline at end of file diff --git a/catch/unit/memory/hipArray3DGetDescriptor.cc b/catch/unit/memory/hipArray3DGetDescriptor.cc new file mode 100644 index 0000000000..31a2881dd8 --- /dev/null +++ b/catch/unit/memory/hipArray3DGetDescriptor.cc @@ -0,0 +1,88 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipArray3DGetDescriptor hipArray3DGetDescriptor + * @{ + * @ingroup MemoryTest + * `hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, hipArray* array)` - + * Gets a 3D array descriptor. + */ + +#include +#include + +/** + * Test Description + * ------------------------ + * - Basic sanity test for `hipArray3DGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArray3DGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArray3DGetDescriptor_Positive_Basic") { + DrvArrayAllocGuard array(make_hipExtent(1024, 4, 2)); + + HIP_ARRAY3D_DESCRIPTOR desc; + HIP_CHECK(hipArray3DGetDescriptor(&desc, array.ptr())); + + using vec_info = vector_info; + REQUIRE(desc.Format == vec_info::format); + REQUIRE(desc.NumChannels == vec_info::size); + REQUIRE(desc.Width == 1024 / sizeof(float)); + REQUIRE(desc.Height == 4); + REQUIRE(desc.Depth == 2); + REQUIRE(desc.Flags == 0); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipArray3DGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArray3DGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArray3DGetDescriptor_Negative_Parameters") { + DrvArrayAllocGuard array(make_hipExtent(1024, 4, 2)); + + HIP_ARRAY3D_DESCRIPTOR desc; + + SECTION("desc is nullptr") { + HIP_CHECK_ERROR(hipArray3DGetDescriptor(nullptr, array.ptr()), hipErrorInvalidValue); + } + + SECTION("array is nullptr") { + HIP_CHECK_ERROR(hipArray3DGetDescriptor(&desc, nullptr), hipErrorInvalidHandle); + } + + SECTION("array is freed") { + HIP_CHECK(hipArrayDestroy(array.ptr())); + HIP_CHECK_ERROR(hipArray3DGetDescriptor(&desc, array.ptr()), hipErrorInvalidHandle); + } +} \ No newline at end of file diff --git a/catch/unit/memory/hipArrayGetDescriptor.cc b/catch/unit/memory/hipArrayGetDescriptor.cc index c977590987..cac8809cb7 100644 --- a/catch/unit/memory/hipArrayGetDescriptor.cc +++ b/catch/unit/memory/hipArrayGetDescriptor.cc @@ -16,12 +16,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include + #include + #include #include +#include +#include +#include + static bool testPassed1D = false; static bool testPassed2D = false; static constexpr auto NUM_ELM{1024}; @@ -459,3 +463,83 @@ TEST_CASE("Unit_hipArrayGetDescriptor_Negative_Scenarios") { #endif } +/** + * @addtogroup hipArrayGetDescriptor hipArrayGetDescriptor + * @{ + * @ingroup MemoryTest + * `hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, hipArray* array)` - + * Gets a 1D or 2D array descriptor. + */ + +/** + * Test Description + * ------------------------ + * - Basic sanity test for `hipArrayGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetDescriptor_Positive_Basic") { + HIP_ARRAY_DESCRIPTOR expected_desc{}; + using vec_info = vector_info; + expected_desc.Format = vec_info::format; + expected_desc.NumChannels = vec_info::size; + expected_desc.Width = 1024 / sizeof(float); + expected_desc.Height = 4; + + hipArray_t ptr; + HIP_CHECK(hipArrayCreate(&ptr, &expected_desc)); + + HIP_ARRAY_DESCRIPTOR desc; + HIP_CHECK(hipArrayGetDescriptor(&desc, ptr)); + + REQUIRE(desc.Format == expected_desc.Format); + REQUIRE(desc.NumChannels == expected_desc.NumChannels); + REQUIRE(desc.Width == expected_desc.Width); + REQUIRE(desc.Height == expected_desc.Height); + + HIP_CHECK(hipArrayDestroy(ptr)); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipArrayGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetDescriptor_Negative_Parameters") { + HIP_ARRAY_DESCRIPTOR expected_desc{}; + using vec_info = vector_info; + expected_desc.Format = vec_info::format; + expected_desc.NumChannels = vec_info::size; + expected_desc.Width = 1024 / sizeof(float); + expected_desc.Height = 4; + + hipArray_t ptr; + HIP_CHECK(hipArrayCreate(&ptr, &expected_desc)); + + HIP_ARRAY_DESCRIPTOR desc; + + SECTION("desc is nullptr") { + HIP_CHECK_ERROR(hipArrayGetDescriptor(nullptr, ptr), hipErrorInvalidValue); + } + + SECTION("array is nullptr") { + HIP_CHECK_ERROR(hipArrayGetDescriptor(&desc, nullptr), hipErrorInvalidHandle); + } + + SECTION("array is freed") { + HIP_CHECK(hipArrayDestroy(ptr)); + HIP_CHECK_ERROR(hipArrayGetDescriptor(&desc, ptr), hipErrorInvalidHandle); + } + + static_cast(hipArrayDestroy(ptr)); +} diff --git a/catch/unit/memory/hipArrayGetInfo.cc b/catch/unit/memory/hipArrayGetInfo.cc new file mode 100644 index 0000000000..10ebbfe1e7 --- /dev/null +++ b/catch/unit/memory/hipArrayGetInfo.cc @@ -0,0 +1,94 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipArrayGetInfo hipArrayGetInfo + * @{ + * @ingroup MemoryTest + * `hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, unsigned int* flags, hipArray* + * array)` - Gets info about the specified array. + */ + +#include +#include + +/** + * Test Description + * ------------------------ + * - Basic sanity test for `hipArrayGetInfo`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetInfo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetInfo_Positive_Basic") { + ArrayAllocGuard array(make_hipExtent(1024, 4, 2)); + + hipChannelFormatDesc desc; + hipExtent extent; + unsigned int flags = 1; + + HIP_CHECK(hipArrayGetInfo(&desc, &extent, &flags, array.ptr())); + + REQUIRE(extent.width == 1024); + REQUIRE(extent.height == 4); + REQUIRE(extent.depth == 2); + + REQUIRE(flags == 0); + + auto expected_desc = hipCreateChannelDesc(); + REQUIRE(desc.x == expected_desc.x); + REQUIRE(desc.y == expected_desc.y); + REQUIRE(desc.z == expected_desc.z); + REQUIRE(desc.w == expected_desc.w); + REQUIRE(desc.f == expected_desc.f); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipArrayGetInfo`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetInfo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetInfo_Negative_Parameters") { + ArrayAllocGuard array(make_hipExtent(1024, 4, 4)); + + hipChannelFormatDesc desc; + hipExtent extent; + unsigned int flags; + + SECTION("array is nullptr") { + HIP_CHECK_ERROR(hipArrayGetInfo(&desc, &extent, &flags, nullptr), hipErrorInvalidHandle); + } + + SECTION("array is freed") { + HIP_CHECK(hipFreeArray(array.ptr())); + HIP_CHECK_ERROR(hipArrayGetInfo(&desc, &extent, &flags, array.ptr()), hipErrorInvalidHandle); + } +} \ No newline at end of file diff --git a/catch/unit/memory/hipHostRegister.cc b/catch/unit/memory/hipHostRegister.cc index cb62532ae7..2110ee9281 100644 --- a/catch/unit/memory/hipHostRegister.cc +++ b/catch/unit/memory/hipHostRegister.cc @@ -32,7 +32,7 @@ THE SOFTWARE. #include #include #include -#include + #include #define OFFSET 128 diff --git a/catch/unit/memory/hipPointerGetAttributes.cc b/catch/unit/memory/hipPointerGetAttributes.cc index 4c34a6edc4..60357678f6 100644 --- a/catch/unit/memory/hipPointerGetAttributes.cc +++ b/catch/unit/memory/hipPointerGetAttributes.cc @@ -30,7 +30,7 @@ Following scenarios are verified for hipPointerGetAttributes API */ #include #include -#include + #ifdef __linux__ #include #endif diff --git a/catch/unit/memory/hipPointerSetAttribute.cc b/catch/unit/memory/hipPointerSetAttribute.cc new file mode 100644 index 0000000000..ebce0855b2 --- /dev/null +++ b/catch/unit/memory/hipPointerSetAttribute.cc @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipPointerSetAttribute hipPointerSetAttribute + * @{ + * @ingroup MemoryTest + * `hipPointerSetAttribute(const void* value, hipPointer_attribute attribute, hipDeviceptr_t ptr)` - + * Set attributes on a previously allocated memory region. + */ + +#include +#include +#include + +/** + * Test Description + * ------------------------ + * - Sets pointer attribute `HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS` and verifies behavior. + * Test source + * ------------------------ + * - unit/memory/hipPointerSetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipPointerSetAttribute_Positive_SyncMemops") { + LinearAllocGuard src(LinearAllocs::hipMalloc, 1024); + LinearAllocGuard dst(LinearAllocs::hipMalloc, 1024); + + StreamGuard stream(Streams::created); + LaunchDelayKernel(std::chrono::milliseconds{100}, stream.stream()); + HIP_CHECK(hipMemcpy(dst.ptr(), src.ptr(), 1024, hipMemcpyDeviceToDevice)); + HIP_CHECK_ERROR(hipStreamQuery(stream.stream()), hipErrorNotReady); + + bool value = true; + HIP_CHECK(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, + reinterpret_cast(src.ptr()))); + HIP_CHECK(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, + reinterpret_cast(dst.ptr()))); + + LaunchDelayKernel(std::chrono::milliseconds{100}, stream.stream()); + HIP_CHECK(hipMemcpy(dst.ptr(), src.ptr(), 1024, hipMemcpyDeviceToDevice)); + HIP_CHECK(hipStreamQuery(stream.stream())); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipPointerSetAttribute`. + * Test source + * ------------------------ + * - unit/memory/hipPointerSetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipPointerSetAttribute_Negative_Parameters") { + LinearAllocGuard mem(LinearAllocs::hipMalloc, 4); + bool value = false; + + SECTION("value is nullptr") { + HIP_CHECK_ERROR(hipPointerSetAttribute(nullptr, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, mem.ptr()), + hipErrorInvalidValue); + } + + SECTION("invalid attribute") { + HIP_CHECK_ERROR( + hipPointerSetAttribute(&value, static_cast(-1), mem.ptr()), + hipErrorInvalidValue); + } + + SECTION("ptr is nullptr") { + HIP_CHECK_ERROR(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, nullptr), + hipErrorInvalidValue); + } + + SECTION("host pointer") { + int mem_host; + HIP_CHECK_ERROR(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, &mem_host), + hipErrorInvalidDevicePointer); + } + + SECTION("freed pointer") { + HIP_CHECK(hipFree(mem.ptr())); + HIP_CHECK_ERROR(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, mem.ptr()), + hipErrorInvalidDevicePointer); + } +} \ No newline at end of file diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt index bda6f79cfc..beb52bb50c 100644 --- a/catch/unit/module/CMakeLists.txt +++ b/catch/unit/module/CMakeLists.txt @@ -29,31 +29,34 @@ set(TEST_SRC hipModuleLaunchKernel.cc hipModuleGetGlobal.cc hipModuleGetTexRef.cc + hipModuleLaunchCooperativeKernel.cc + hipModuleLaunchCooperativeKernelMultiDevice.cc + hipFuncGetAttribute.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code - COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc + COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc -o get_function_module.code -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc) add_custom_target(get_function_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code - COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc + COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc -o launch_kernel_module.code -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc) add_custom_target(launch_kernel_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code - COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc + COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc -o get_global_test_module.code -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc) add_custom_target(get_global_test_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code - COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc + COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc -o get_tex_ref_module.code -I${HIP_PATH}/include/ --rocm-path=${ROCM_PATH} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc) diff --git a/catch/unit/module/hipExtModuleLaunchKernel.cc b/catch/unit/module/hipExtModuleLaunchKernel.cc index 8c77b796d1..3772587453 100644 --- a/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -44,7 +44,7 @@ THE SOFTWARE. */ #include -#include + #include #include #include "hip/hip_ext.h" diff --git a/catch/unit/module/hipFuncGetAttribute.cc b/catch/unit/module/hipFuncGetAttribute.cc new file mode 100644 index 0000000000..c55b5179d3 --- /dev/null +++ b/catch/unit/module/hipFuncGetAttribute.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include +#include + +static hipModule_t GetModule() { + HIP_CHECK(hipFree(nullptr)); + static const auto mg = ModuleGuard::LoadModule("get_function_module.code"); + return mg.module(); +} + +TEST_CASE("Unit_hipFuncGetAttribute_Positive_Basic") { + hipFunction_t kernel = GetKernel(GetModule(), "GlobalKernel"); + + int value; + + SECTION("binaryVersion") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, kernel)); +#if HT_NVIDIA + const auto major = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMajor, 0); + const auto minor = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMinor, 0); + REQUIRE(value == major * 10 + minor); +#elif HT_AMD + REQUIRE(value > 0); +#endif + } + + SECTION("cacheModeCA") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, kernel)); + REQUIRE((value == 0 || value == 1)); + } + + SECTION("maxThreadsPerBlock") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, kernel)); + REQUIRE(value == GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0)); + } + + SECTION("numRegs") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_NUM_REGS, kernel)); + REQUIRE(value >= 0); + } + + SECTION("ptxVersion") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_PTX_VERSION, kernel)); + REQUIRE(value > 0); + } + + SECTION("sharedSizeBytes") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel)); + REQUIRE(value <= GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + } +} + +TEST_CASE("Unit_hipFuncGetAttribute_Negative_Parameters") { + hipFunction_t kernel = GetKernel(GetModule(), "GlobalKernel"); + + int value; + + SECTION("value == nullptr") { + HIP_CHECK_ERROR(hipFuncGetAttribute(nullptr, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, kernel), + hipErrorInvalidValue); + } + + SECTION("invalid attribute") { + HIP_CHECK_ERROR(hipFuncGetAttribute(&value, static_cast(-1), kernel), + hipErrorInvalidValue); + } + + SECTION("hfunc == nullptr") { + HIP_CHECK_ERROR(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, nullptr), + hipErrorInvalidResourceHandle); + } +} \ No newline at end of file diff --git a/catch/unit/module/hipModuleLaunchCooperativeKernel.cc b/catch/unit/module/hipModuleLaunchCooperativeKernel.cc new file mode 100644 index 0000000000..cf92152bce --- /dev/null +++ b/catch/unit/module/hipModuleLaunchCooperativeKernel.cc @@ -0,0 +1,211 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipModuleLaunchCooperativeKernel hipModuleLaunchCooperativeKernel + * @{ + * @ingroup ModuleTest + * `hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + * unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, + * unsigned int sharedMemBytes, hipStream_t stream, void ** kernelParams)` - + * Launches kernel f with launch parameters and shared memory on stream with arguments passed to + * kernelParams, where thread blocks can cooperate and synchronize as they execute. + */ + +#include +#include +#include + +#include "hip_module_launch_kernel_common.hh" + +/** + * Test Description + * ------------------------ + * - Tests `hipModuleLaunchCooperativeKernel` for a cooperative kernel with no parameters, and for + * a normal kernel with parameters. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + SECTION("Cooperative kernel with no arguments") { + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 2, 2, 1, 1, 1, 1, 0, nullptr, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + hipFunction_t f = GetKernel(mg.module(), "Kernel42"); + + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, 0, nullptr, kernel_args)); + + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +/** + * Test Description + * ------------------------ + * - Positive parameters test for `hipModuleLaunchCooperativeKernel`. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Positive_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr)); + } + + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, y, 1, 1, 0, nullptr, nullptr)); + } + + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, z, 1, 1, 0, nullptr, nullptr)); + } +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipModuleLaunchCooperativeKernel`. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + + SECTION("f == nullptr") { + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidResourceHandle); + } + + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 0, 1, 1, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 0, 1, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 0, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 0, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 0, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 0, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, y, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, z, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernel(f, 1, 1, 1, dim, dim, dim, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + +#if HT_AMD // Disabled due to defect EXSWHTEC-351 + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, max, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, 0, stream, nullptr), + hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc b/catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc new file mode 100644 index 0000000000..1deaae02c0 --- /dev/null +++ b/catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc @@ -0,0 +1,227 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipModuleLaunchCooperativeKernelMultiDevice + * hipModuleLaunchCooperativeKernelMultiDevice + * @{ + * @ingroup ModuleTest + * `hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, unsigned + * int numDevices, unsigned int flags)` - + * Launches kernels on multiple devices where thread blocks can cooperate and synchronize as they + * execute. + */ + +#include +#include +#include + +#include "hip_module_launch_kernel_common.hh" + +/** + * Test Description + * ------------------------ + * - Tests `hipModuleLaunchCooperativeKernel` for a cooperative kernel with no parameters. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernelMultiDevice_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.function = f; + params.gridDimX = 1; + params.gridDimY = 1; + params.gridDimZ = 1; + params.blockDimX = 1; + params.blockDimY = 1; + params.blockDimZ = 1; + params.kernelParams = nullptr; + params.sharedMemBytes = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.hStream)); + } + + HIP_CHECK(hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u)); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamSynchronize(params.hStream)); + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.hStream)); + } +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipModuleLaunchCooperativeKernelMultiDevice`. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernelMultiDevice_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.function = f; + params.gridDimX = 1; + params.gridDimY = 1; + params.gridDimZ = 1; + params.blockDimX = 1; + params.blockDimY = 1; + params.blockDimZ = 1; + params.kernelParams = nullptr; + params.sharedMemBytes = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.hStream)); + } + + SECTION("launchParamsList == nullptr") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernelMultiDevice(nullptr, device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), 0, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices > device count") { + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count + 1, 0u), + hipErrorInvalidValue); + } + + SECTION("invalid flags") { + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 999), + hipErrorInvalidValue); + } + + if (device_count > 1) { + SECTION("launchParamsList.func doesn't match across all devices") { + params_list[1].function = GetKernel(mg.module(), "NOPKernel"); + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.gridDim doesn't match across all kernels") { + params_list[1].gridDimX = 2; + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.blockDim doesn't match across all kernels") { + params_list[1].blockDimX = 2; + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.sharedMem doesn't match across all kernels") { + params_list[1].sharedMemBytes = 1024; + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.hStream)); + } +} + +/** + * Test Description + * ------------------------ + * - Tries running `hipModuleLaunchCooperativeKernelMultiDevice` with multiple kernels on the same + * device. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + + HIP_CHECK(hipSetDevice(0)); + + std::vector params_list(2); + + for (auto& params : params_list) { + params.function = f; + params.gridDimX = 1; + params.gridDimY = 1; + params.gridDimZ = 1; + params.blockDimX = 1; + params.blockDimY = 1; + params.blockDimZ = 1; + params.kernelParams = nullptr; + params.sharedMemBytes = 0; + HIP_CHECK(hipStreamCreate(¶ms.hStream)); + } + + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), 2, 0u), + hipErrorInvalidValue); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.hStream)); + } +} \ No newline at end of file diff --git a/catch/unit/module/hip_module_launch_kernel_common.hh b/catch/unit/module/hip_module_launch_kernel_common.hh index eaf1f970c9..e033a8eb88 100644 --- a/catch/unit/module/hip_module_launch_kernel_common.hh +++ b/catch/unit/module/hip_module_launch_kernel_common.hh @@ -88,32 +88,32 @@ template void ModuleLaunchKernelPositiveParamet }; SECTION("gridDimX == maxGridDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxGridDimX, 0); LaunchNOPKernel(x, 1, 1, 1, 1, 1); } SECTION("gridDimY == maxGridDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxGridDimY, 0); LaunchNOPKernel(1, y, 1, 1, 1, 1); } SECTION("gridDimZ == maxGridDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxGridDimZ, 0); LaunchNOPKernel(1, 1, z, 1, 1, 1); } SECTION("blockDimX == maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); LaunchNOPKernel(1, 1, 1, x, 1, 1); } SECTION("blockDimY == maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); LaunchNOPKernel(1, 1, 1, 1, y, 1); } SECTION("blockDimZ == maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); LaunchNOPKernel(1, 1, 1, 1, 1, z); } } @@ -163,19 +163,19 @@ template void ModuleLaunchKernelNegativeParamet // Disabled on AMD due to defect - EXSWHTEC-158 #if HT_NVIDIA SECTION("gridDimX > maxGridDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxGridDimX, 0) + 1u; HIP_CHECK_ERROR(func(f, x, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } SECTION("gridDimY > maxGridDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxGridDimY, 0) + 1u; HIP_CHECK_ERROR(func(f, 1, y, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } SECTION("gridDimZ > maxGridDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxGridDimZ, 0) + 1u; HIP_CHECK_ERROR(func(f, 1, 1, z, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } @@ -184,19 +184,19 @@ template void ModuleLaunchKernelNegativeParamet // Disabled on AMD due to defect - EXSWHTEC-156 #if HT_NVIDIA SECTION("blockDimX > maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; HIP_CHECK_ERROR(func(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } SECTION("blockDimY > maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, y, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } SECTION("blockDimZ > maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, z, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } @@ -205,7 +205,7 @@ template void ModuleLaunchKernelNegativeParamet // Disabled on AMD due to defect - EXSWHTEC-162 #if HT_NVIDIA SECTION("blockDimX * blockDimY * blockDimZ > MaxThreadsPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); const unsigned int dim = std::ceil(std::cbrt(max)) + 1; HIP_CHECK_ERROR( func(f, 1, 1, 1, dim, dim, dim, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), @@ -216,7 +216,7 @@ template void ModuleLaunchKernelNegativeParamet // Disabled on AMD due to defect - EXSWHTEC-159 #if HT_NVIDIA SECTION("sharedMemBytes > max shared memory per block") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, 1, max, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), hipErrorInvalidValue); } @@ -241,8 +241,8 @@ template void ModuleLaunchKernelNegativeParamet void* kernel_args[1] = {&result_ptr}; // clang-format off void *extra[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &result_ptr, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_BUFFER_POINTER, &result_ptr, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END }; // clang-format on diff --git a/catch/unit/module/launch_kernel_module.cc b/catch/unit/module/launch_kernel_module.cc index 01c04b45d6..12821da450 100644 --- a/catch/unit/module/launch_kernel_module.cc +++ b/catch/unit/module/launch_kernel_module.cc @@ -20,6 +20,7 @@ THE SOFTWARE. */ #include +#include extern "C" { __global__ void NOPKernel() {} @@ -34,4 +35,9 @@ __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { } } } + +__global__ void CoopKernel() { + cooperative_groups::grid_group grid = cooperative_groups::this_grid(); + grid.sync(); +} } \ No newline at end of file diff --git a/catch/unit/p2p/hipDeviceGetP2PAttribute.cc b/catch/unit/p2p/hipDeviceGetP2PAttribute.cc index 89207fee7f..5564fdfc31 100644 --- a/catch/unit/p2p/hipDeviceGetP2PAttribute.cc +++ b/catch/unit/p2p/hipDeviceGetP2PAttribute.cc @@ -23,7 +23,7 @@ THE SOFTWARE. #include "hip/hip_runtime_api.h" #include #include -#include + /** * @addtogroup hipDeviceGetP2PAttribute hipDeviceGetP2PAttribute diff --git a/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc b/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc index 2aade21ad3..fcd114634f 100644 --- a/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc +++ b/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + #ifdef __linux__ #include #include diff --git a/catch/unit/printf/printfFlagsNonHost.cc b/catch/unit/printf/printfFlagsNonHost.cc index 75f5ef7c35..1fd7900d52 100644 --- a/catch/unit/printf/printfFlagsNonHost.cc +++ b/catch/unit/printf/printfFlagsNonHost.cc @@ -18,7 +18,7 @@ THE SOFTWARE. #include #include -#include + /** * @addtogroup printf printf diff --git a/catch/unit/printf/printfHost.cc b/catch/unit/printf/printfHost.cc index 3456d63d21..a4afc5268a 100644 --- a/catch/unit/printf/printfHost.cc +++ b/catch/unit/printf/printfHost.cc @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include -#include + // Kernel Function __global__ void run_printf(int *count) { diff --git a/catch/unit/printf/printfNonHost.cc b/catch/unit/printf/printfNonHost.cc index 36bd8e35f6..80989de77a 100644 --- a/catch/unit/printf/printfNonHost.cc +++ b/catch/unit/printf/printfNonHost.cc @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include -#include + #define ITER_COUNT 61681 #define KERNEL_ITERATIONS 15 diff --git a/catch/unit/printf/printfSpecifiersNonHost.cc b/catch/unit/printf/printfSpecifiersNonHost.cc index 7c6559641f..c712e5e435 100644 --- a/catch/unit/printf/printfSpecifiersNonHost.cc +++ b/catch/unit/printf/printfSpecifiersNonHost.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include + /** * @addtogroup printf diff --git a/catch/unit/stream/hipStreamGetDevice.cc b/catch/unit/stream/hipStreamGetDevice.cc index 9f2eef521e..fd89069f2f 100644 --- a/catch/unit/stream/hipStreamGetDevice.cc +++ b/catch/unit/stream/hipStreamGetDevice.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #define NUMBER_OF_THREADS 10 static bool thread_results[NUMBER_OF_THREADS]; @@ -54,8 +54,7 @@ TEST_CASE("Unit_hipStreamGetDevice_Negative") { HIP_CHECK(hipStreamCreate(&stream)); HIP_CHECK_ERROR(hipStreamGetDevice(nullptr, nullptr), hipErrorInvalidValue); - HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), hipErrorInvalidValue); HIP_CHECK_ERROR(hipStreamGetDevice(stream, nullptr), hipErrorInvalidValue); HIP_CHECK(hipStreamDestroy(stream)); } @@ -145,9 +144,7 @@ static bool validateStreamGetDevice() { return true; } -static void thread_Test(int threadNum) { - thread_results[threadNum] = validateStreamGetDevice(); -} +static void thread_Test(int threadNum) { thread_results[threadNum] = validateStreamGetDevice(); } static bool test_hipStreamGetDevice_MThread() { std::vector tests; @@ -158,7 +155,7 @@ static bool test_hipStreamGetDevice_MThread() { tests.push_back(std::thread(thread_Test, idx)); } // Wait for all threads to complete - for (std::thread &t : tests) { + for (std::thread& t : tests) { t.join(); } // Wait for thread @@ -169,9 +166,7 @@ static bool test_hipStreamGetDevice_MThread() { return status; } -TEST_CASE("Unit_hipStreamGetDevice_MThread") { - REQUIRE(true == test_hipStreamGetDevice_MThread()); -} +TEST_CASE("Unit_hipStreamGetDevice_MThread") { REQUIRE(true == test_hipStreamGetDevice_MThread()); } /** * Test Description diff --git a/catch/unit/stream_ordered/CMakeLists.txt b/catch/unit/stream_ordered/CMakeLists.txt new file mode 100644 index 0000000000..e9a5e56a8d --- /dev/null +++ b/catch/unit/stream_ordered/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Common Tests - Test independent of all platforms +set(COMMON_SHARED_SRC helper_multiprocess.cc) + +set(TEST_SRC + hipMemPoolExportImport.cc + hipMemPoolExportImportIPC.cc) + +hip_add_exe_to_target(NAME StreamOrderedTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) diff --git a/catch/unit/stream_ordered/helper_multiprocess.cc b/catch/unit/stream_ordered/helper_multiprocess.cc new file mode 100644 index 0000000000..77797fa71b --- /dev/null +++ b/catch/unit/stream_ordered/helper_multiprocess.cc @@ -0,0 +1,518 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "helper_multiprocess.hh" +#include +#include + +int sharedMemoryCreate(const char* name, size_t sz, sharedMemoryInfo* info) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + info->size = sz; + info->shmHandle = + CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, (DWORD)sz, name); + if (info->shmHandle == 0) { + return GetLastError(); + } + + info->addr = MapViewOfFile(info->shmHandle, FILE_MAP_ALL_ACCESS, 0, 0, sz); + if (info->addr == NULL) { + return GetLastError(); + } + + return 0; +#else + int status = 0; + + info->size = sz; + + info->shmFd = shm_open(name, O_RDWR | O_CREAT, 0777); + if (info->shmFd < 0) { + return errno; + } + + status = ftruncate(info->shmFd, sz); + if (status != 0) { + return status; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) { + return errno; + } + + return 0; +#endif +} + +int sharedMemoryOpen(const char* name, size_t sz, sharedMemoryInfo* info) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + info->size = sz; + + info->shmHandle = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, name); + if (info->shmHandle == 0) { + return GetLastError(); + } + + info->addr = MapViewOfFile(info->shmHandle, FILE_MAP_ALL_ACCESS, 0, 0, sz); + if (info->addr == NULL) { + return GetLastError(); + } + + return 0; +#else + info->size = sz; + + info->shmFd = shm_open(name, O_RDWR, 0777); + if (info->shmFd < 0) { + return errno; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) { + return errno; + } + + return 0; +#endif +} + +void sharedMemoryClose(sharedMemoryInfo* info) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + if (info->addr) { + UnmapViewOfFile(info->addr); + } + if (info->shmHandle) { + CloseHandle(info->shmHandle); + } +#else + if (info->addr) { + munmap(info->addr, info->size); + } + if (info->shmFd) { + close(info->shmFd); + } +#endif +} + +int spawnProcess(Process* process, const char* app, char* const* args) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + STARTUPINFO si = {}; + memset(&si, 0, sizeof(STARTUPINFO)); + BOOL status; + std::string arg_string; + memset(process, 0, sizeof(*process)); + + while (*args) { + arg_string.append(*args).append(1, ' '); + args++; + } + + status = + CreateProcess(app, LPSTR(arg_string.c_str()), NULL, NULL, FALSE, 0, NULL, NULL, &si, process); + + return status ? 0 : GetLastError(); +#else + *process = fork(); + if (*process == 0) { + if (0 > execvp(app, args)) { + return errno; + } + } else if (*process < 0) { + return errno; + } + return 0; +#endif +} + +int waitProcess(Process* process) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + DWORD exitCode; + WaitForSingleObject(process->hProcess, INFINITE); + GetExitCodeProcess(process->hProcess, &exitCode); + CloseHandle(process->hProcess); + CloseHandle(process->hThread); + return (int)exitCode; +#else + int status = 0; + do { + if (0 > waitpid(*process, &status, 0)) { + return errno; + } + } while (!WIFEXITED(status)); + return WEXITSTATUS(status); +#endif +} + +#if defined(__linux__) +int ipcCreateSocket(ipcHandle*& handle, const char* name, const std::vector& /*processes*/) { + int server_fd; + struct sockaddr_un servaddr; + + handle = new ipcHandle; + memset(handle, 0, sizeof(*handle)); + handle->socket = -1; + handle->socketName = NULL; + + // Creating socket file descriptor + if ((server_fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == 0) { + perror("IPC failure: Socket creation failed"); + return -1; + } + + unlink(name); + bzero(&servaddr, sizeof(servaddr)); + servaddr.sun_family = AF_UNIX; + + size_t len = strlen(name); + if (len > (sizeof(servaddr.sun_path) - 1)) { + perror("IPC failure: Cannot bind provided name to socket. Name too large"); + return -1; + } + + strncpy(servaddr.sun_path, name, len); + + if (bind(server_fd, (struct sockaddr*)&servaddr, SUN_LEN(&servaddr)) < 0) { + perror("IPC failure: Binding socket failed"); + return -1; + } + + handle->socketName = new char[strlen(name) + 1]; + strcpy(handle->socketName, name); + handle->socket = server_fd; + return 0; +} + +int ipcOpenSocket(ipcHandle*& handle) { + int sock = 0; + struct sockaddr_un cliaddr; + + handle = new ipcHandle; + memset(handle, 0, sizeof(*handle)); + + if ((sock = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) { + perror("IPC failure:Socket creation error"); + return -1; + } + + bzero(&cliaddr, sizeof(cliaddr)); + cliaddr.sun_family = AF_UNIX; + char temp[10]; + + // Create unique name for the socket. + sprintf(temp, "%u", getpid()); + + strcpy(cliaddr.sun_path, temp); + if (bind(sock, (struct sockaddr*)&cliaddr, sizeof(cliaddr)) < 0) { + perror("IPC failure: Binding socket failed"); + return -1; + } + + handle->socket = sock; + handle->socketName = new char[strlen(temp) + 1]; + strcpy(handle->socketName, temp); + + return 0; +} + +int ipcCloseSocket(ipcHandle* handle) { + if (!handle) { + return -1; + } + + if (handle->socketName) { + unlink(handle->socketName); + delete[] handle->socketName; + } + close(handle->socket); + delete handle; + return 0; +} + +int ipcRecvShareableHandle(ipcHandle* handle, ShareableHandle* shHandle) { + struct msghdr msg = {}; + memset(&msg, 0, sizeof(struct msghdr)); + struct iovec iov[1]; + + // Union to guarantee alignment requirements for control array + union { + struct cmsghdr cm; + char control[CMSG_SPACE(sizeof(int))]; + } control_un; + + struct cmsghdr* cmptr; + int receivedfd; + char dummy_buffer[1]; + + msg.msg_control = control_un.control; + msg.msg_controllen = sizeof(control_un.control); + + iov[0].iov_base = (void*)dummy_buffer; + iov[0].iov_len = sizeof(dummy_buffer); + + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + if (recvmsg(handle->socket, &msg, 0) <= 0) { + perror("IPC failure: Receiving data over socket failed"); + return -1; + } + + if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) && (cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) { + if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) { + return -1; + } + + memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd)); + *(int*)shHandle = receivedfd; + } else { + return -1; + } + + return 0; +} + +int ipcRecvDataFromClient(ipcHandle* serverHandle, void* data, size_t size) { + ssize_t readResult; + struct sockaddr_un cliaddr; + socklen_t len = sizeof(cliaddr); + + readResult = recvfrom(serverHandle->socket, data, size, 0, (struct sockaddr*)&cliaddr, &len); + if (readResult == -1) { + perror("IPC failure: Receiving data over socket failed"); + return -1; + } + return 0; +} + +int ipcSendDataToServer(ipcHandle* handle, const char* serverName, const void* data, size_t size) { + ssize_t sendResult; + struct sockaddr_un serveraddr; + + bzero(&serveraddr, sizeof(serveraddr)); + serveraddr.sun_family = AF_UNIX; + strncpy(serveraddr.sun_path, serverName, sizeof(serveraddr.sun_path) - 1); + + sendResult = + sendto(handle->socket, data, size, 0, (struct sockaddr*)&serveraddr, sizeof(serveraddr)); + if (sendResult <= 0) { + perror("IPC failure: Sending data over socket failed"); + } + + return 0; +} + +int ipcSendShareableHandle(ipcHandle* handle, const std::vector& shareableHandles, + Process process, int data) { + struct msghdr msg; + struct iovec iov[1]; + + union { + struct cmsghdr cm; + char control[CMSG_SPACE(sizeof(int))]; + } control_un; + + struct cmsghdr* cmptr; + struct sockaddr_un cliaddr; + + // Construct client address to send this SHareable handle to + bzero(&cliaddr, sizeof(cliaddr)); + cliaddr.sun_family = AF_UNIX; + char temp[10]; + sprintf(temp, "%u", process); + strcpy(cliaddr.sun_path, temp); + + // Send corresponding shareable handle to the client + int sendfd = (int)shareableHandles[data]; + + msg.msg_control = control_un.control; + msg.msg_controllen = sizeof(control_un.control); + + cmptr = CMSG_FIRSTHDR(&msg); + cmptr->cmsg_len = CMSG_LEN(sizeof(int)); + cmptr->cmsg_level = SOL_SOCKET; + cmptr->cmsg_type = SCM_RIGHTS; + + memmove(CMSG_DATA(cmptr), &sendfd, sizeof(sendfd)); + + msg.msg_name = (void*)&cliaddr; + msg.msg_namelen = sizeof(struct sockaddr_un); + + iov[0].iov_base = (void*)""; + iov[0].iov_len = 1; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + ssize_t sendResult = sendmsg(handle->socket, &msg, 0); + if (sendResult <= 0) { + perror("IPC failure: Sending data over socket failed"); + return -1; + } + + return 0; +} + +int ipcSendShareableHandles(ipcHandle* handle, const std::vector& shareableHandles, + const std::vector& processes) { + // Send all shareable handles to every single process. + for (unsigned int i = 0; i < shareableHandles.size(); i++) { + for (unsigned int j = 0; j < processes.size(); j++) { + checkIpcErrors(ipcSendShareableHandle(handle, shareableHandles, processes[j], i)); + } + } + return 0; +} + +int ipcRecvShareableHandles(ipcHandle* handle, std::vector& shareableHandles) { + for (unsigned int i = 0; i < shareableHandles.size(); i++) { + checkIpcErrors(ipcRecvShareableHandle(handle, &shareableHandles[i])); + } + return 0; +} + +int ipcCloseShareableHandle(ShareableHandle shHandle) { return close(shHandle); } + +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +// Generic name to build individual Mailslot names by appending process ids. +LPTSTR SlotName = (LPTSTR)TEXT("\\\\.\\mailslot\\sample_mailslot_"); + +int ipcCreateSocket(ipcHandle*& handle, const char*, const std::vector& processes) { + handle = new ipcHandle; + handle->hMailslot.resize(processes.size()); + + // Open Mailslots of all clients and store respective handles. + for (unsigned int i = 0; i < handle->hMailslot.size(); ++i) { + std::basic_string childSlotName(SlotName); + char tempBuf[20]; + _itoa_s(processes[i].dwProcessId, tempBuf, 10); + childSlotName += TEXT(tempBuf); + + HANDLE hFile = + CreateFile(TEXT(childSlotName.c_str()), GENERIC_WRITE, FILE_SHARE_READ, + (LPSECURITY_ATTRIBUTES)NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, (HANDLE)NULL); + if (hFile == INVALID_HANDLE_VALUE) { + printf("IPC failure: Opening Mailslot by CreateFile failed with %lu\n", GetLastError()); + return -1; + } + handle->hMailslot[i] = hFile; + } + return 0; +} + +int ipcOpenSocket(ipcHandle*& handle) { + handle = new ipcHandle; + HANDLE hSlot; + + std::basic_string clientSlotName(SlotName); + char tempBuf[20]; + _itoa_s(GetCurrentProcessId(), tempBuf, 10); + clientSlotName += TEXT(tempBuf); + + hSlot = CreateMailslot((LPSTR)clientSlotName.c_str(), 0, MAILSLOT_WAIT_FOREVER, + (LPSECURITY_ATTRIBUTES)NULL); + if (hSlot == INVALID_HANDLE_VALUE) { + printf("IPC failure: CreateMailslot failed for client with %lu\n", GetLastError()); + return -1; + } + + handle->hMailslot.push_back(hSlot); + return 0; +} + +int ipcSendData(HANDLE mailslot, const void* data, size_t sz) { + BOOL result; + DWORD cbWritten; + + result = WriteFile(mailslot, data, (DWORD)sz, &cbWritten, (LPOVERLAPPED)NULL); + if (!result) { + printf("IPC failure: WriteFile failed with %lu.\n", GetLastError()); + return -1; + } + return 0; +} + +int ipcRecvData(ipcHandle* handle, void* data, size_t sz) { + DWORD cbRead = 0; + + if (!ReadFile(handle->hMailslot[0], data, (DWORD)sz, &cbRead, NULL)) { + printf("IPC failure: ReadFile failed with %lu.\n", GetLastError()); + return -1; + } + + if (sz != (size_t)cbRead) { + printf("IPC failure: ReadFile didn't receive the expected number of bytes\n"); + return -1; + } + + return 0; +} + +int ipcSendShareableHandles(ipcHandle* handle, const std::vector& shareableHandles, + const std::vector& processes) { + // Send all shareable handles to every single process. + for (unsigned int i = 0; i < processes.size(); i++) { + HANDLE hProcess = OpenProcess(PROCESS_DUP_HANDLE, FALSE, processes[i].dwProcessId); + if (hProcess == INVALID_HANDLE_VALUE) { + printf("IPC failure: OpenProcess failed (%lu)\n", GetLastError()); + return -1; + } + + for (unsigned int j = 0; j < shareableHandles.size(); j++) { + HANDLE hDup = INVALID_HANDLE_VALUE; + // Duplicate the handle into the target process's space + if (!DuplicateHandle(GetCurrentProcess(), shareableHandles[j], hProcess, &hDup, 0, FALSE, + DUPLICATE_SAME_ACCESS)) { + printf("IPC failure: DuplicateHandle failed (%lu)\n", GetLastError()); + return -1; + } + checkIpcErrors(ipcSendData(handle->hMailslot[i], &hDup, sizeof(hDup))); + } + CloseHandle(hProcess); + } + return 0; +} + +int ipcRecvShareableHandles(ipcHandle* handle, std::vector& shareableHandles) { + for (unsigned int i = 0; i < shareableHandles.size(); i++) { + checkIpcErrors(ipcRecvData(handle, &shareableHandles[i], sizeof(shareableHandles[i]))); + } + return 0; +} + +int ipcCloseSocket(ipcHandle* handle) { + for (unsigned int i = 0; i < handle->hMailslot.size(); i++) { + CloseHandle(handle->hMailslot[i]); + } + delete handle; + return 0; +} + +int ipcCloseShareableHandle(ShareableHandle shHandle) { + CloseHandle(shHandle); + return 0; +} + +#endif diff --git a/catch/unit/stream_ordered/helper_multiprocess.hh b/catch/unit/stream_ordered/helper_multiprocess.hh new file mode 100644 index 0000000000..b9bb4e16ea --- /dev/null +++ b/catch/unit/stream_ordered/helper_multiprocess.hh @@ -0,0 +1,119 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HELPER_MULTIPROCESS_H +#define HELPER_MULTIPROCESS_H + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif +#include + +typedef struct sharedMemoryInfo_st { + void* addr; + size_t size; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + HANDLE shmHandle; +#else + int shmFd; +#endif +} sharedMemoryInfo; + +int sharedMemoryCreate(const char* name, size_t sz, sharedMemoryInfo* info); + +int sharedMemoryOpen(const char* name, size_t sz, sharedMemoryInfo* info); + +void sharedMemoryClose(sharedMemoryInfo* info); + + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +typedef PROCESS_INFORMATION Process; +#else +typedef pid_t Process; +#endif + +int spawnProcess(Process* process, const char* app, char* const* args); + +int waitProcess(Process* process); + +#define checkIpcErrors(ipcFuncResult) \ + if (ipcFuncResult == -1) { \ + fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); \ + exit(EXIT_FAILURE); \ + } + +#if defined(__linux__) +struct ipcHandle_st { + int socket; + char* socketName; +}; +typedef int ShareableHandle; +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +struct ipcHandle_st { + std::vector + hMailslot; // 1 Handle in case of child and `num children` Handles for parent. +}; +typedef HANDLE ShareableHandle; +#endif + +typedef struct ipcHandle_st ipcHandle; + +int ipcCreateSocket(ipcHandle*& handle, const char* name, const std::vector& processes); + +int ipcOpenSocket(ipcHandle*& handle); + +int ipcCloseSocket(ipcHandle* handle); + +int ipcRecvShareableHandles(ipcHandle* handle, std::vector& shareableHandles); + +int ipcSendShareableHandles(ipcHandle* handle, const std::vector& shareableHandles, + const std::vector& processes); + +int ipcCloseShareableHandle(ShareableHandle shHandle); + +#endif // HELPER_MULTIPROCESS_H diff --git a/catch/unit/stream_ordered/hipMemPoolExportImport.cc b/catch/unit/stream_ordered/hipMemPoolExportImport.cc new file mode 100644 index 0000000000..94e2954692 --- /dev/null +++ b/catch/unit/stream_ordered/hipMemPoolExportImport.cc @@ -0,0 +1,495 @@ +/* + Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +#ifdef _WIN64 +#define NOMINMAX +#endif /* _WIN64 */ + +#include +#include +#include + +/** + * @addtogroup hipMemPoolExportToShareableHandle hipMemPoolExportToShareableHandle + * @{ + * @ingroup StreamOTest + * `hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + * hipMemAllocationHandleType handle_type, unsigned int flags)` - Exports a memory pool to the + * requested handle type. + */ + +/** + * Test Description + * ------------------------ + * - Basic test to verify exporting/importing a shareable handle on a single device in a single + * process. + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportImport_Functional") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int shareable_handle; + hipMemPoolPtrExportData export_ptr; + void* ptr; + + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + HIP_CHECK(hipSetDevice(0)); + StreamGuard stream(Streams::withFlags, hipStreamNonBlocking); + + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + // Allocate memory in a stream from the pool just created + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, mempool, stream.stream())); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&shareable_handle, mempool, handle_type, 0)); + + memset((void*)&export_ptr, 0, sizeof(hipMemPoolPtrExportData)); + HIP_CHECK(hipMemPoolExportPointer(&export_ptr, reinterpret_cast(ptr))); + + LinearAllocGuard host_ptr(LinearAllocs::hipHostMalloc, kPageSize); + + hipMemPool_t shared_mempool; + int* shared_ptr; + + HIP_CHECK(hipMemPoolImportFromShareableHandle( + &shared_mempool, reinterpret_cast(shareable_handle), handle_type, 0)); + + hipMemAccessFlags access_flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = 0; + HIP_CHECK(hipMemPoolGetAccess(&access_flags, shared_mempool, &location)); + if (access_flags != hipMemAccessFlagsProtReadWrite) { + hipMemAccessDesc desc; + memset(&desc, 0, sizeof(hipMemAccessDesc)); + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = 0; + desc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(shared_mempool, &desc, 1)); + } + + HIP_CHECK( + hipMemPoolImportPointer(reinterpret_cast(&shared_ptr), shared_mempool, &export_ptr)); + + const auto element_count = kPageSize / sizeof(int); + constexpr auto thread_count = 1024; + const auto block_count = element_count / thread_count + 1; + int expected_value = 12; + VectorSet<<>>(shared_ptr, expected_value, + element_count); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + // Copy the buffer locally + HIP_CHECK(hipMemcpyAsync(host_ptr.host_ptr(), shared_ptr, kPageSize, hipMemcpyDeviceToHost, + stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + // Check if the content is as expected + ArrayFindIfNot(host_ptr.host_ptr(), expected_value, element_count); + + // Free the memory before the exporter process frees it + HIP_CHECK(hipFreeAsync(shared_ptr, stream.stream())); + + // And wait for all the queued up work to complete + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + HIP_CHECK(hipFreeAsync(ptr, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolExportToShareableHandle behavior with invalid arguments: + * -# Invalid shareable handle + * -# Invalid Memory Pool + * -# Invalid flag + * -# Invalid Memory Pool properties + * -# Invalid Memory Handle type + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportToShareableHandle_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + SECTION("Invalid shareable handle") { + HIP_CHECK_ERROR( + hipMemPoolExportToShareableHandle(nullptr, mempool, hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool") { + int share_handle; + + HIP_CHECK_ERROR(hipMemPoolExportToShareableHandle(&share_handle, nullptr, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid flag") { + int share_handle; + + HIP_CHECK_ERROR(hipMemPoolExportToShareableHandle(&share_handle, mempool, + hipMemHandleTypePosixFileDescriptor, 1), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool properties") { + int share_handle; + pool_props.handleTypes = hipMemHandleTypeNone; + hipMemPool_t mempool_none; + HIP_CHECK(hipMemPoolCreate(&mempool_none, &pool_props)); + + HIP_CHECK_ERROR(hipMemPoolExportToShareableHandle(&share_handle, mempool_none, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + HIP_CHECK(hipMemPoolDestroy(mempool_none)); + } + + SECTION("Invalid Memory Handle type") { + int share_handle; + + HIP_CHECK_ERROR( + hipMemPoolExportToShareableHandle(&share_handle, mempool, hipMemHandleTypeNone, 0), + hipErrorInvalidValue); + } + + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * End doxygen group hipMemPoolExportToShareableHandle. + * @} + */ + + +/** + * @addtogroup hipMemPoolImportFromShareableHandle hipMemPoolImportFromShareableHandle + * @{ + * @ingroup StreamOTest + * `hipMemPoolImportFromShareableHandle(hipMemPool_t* mem_pool, void* shared_handle, + * hipMemAllocationHandleType handle_type,unsigned int flags)` - Imports a memory pool from a shared + * handle. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipMemPoolExportImport_Functional + * - @ref Unit_hipMemPoolExportImport_IPC_Functional + * - @ref Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolImportFromShareableHandle behavior with invalid arguments: + * -# Invalid shareable handle + * -# Invalid Memory Pool + * -# Invalid flag + * -# Invalid Memory Handle type + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolImportFromShareableHandle_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int share_handle; + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + HIP_CHECK(hipMemPoolExportToShareableHandle(&share_handle, mempool, + hipMemHandleTypePosixFileDescriptor, 0)); + + hipMemPool_t shared_mempool = nullptr; + + SECTION("Invalid shareable handle") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(&shared_mempool, nullptr, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(nullptr, &share_handle, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid flag") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(&shared_mempool, &share_handle, + hipMemHandleTypePosixFileDescriptor, 1), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Handle type") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(&shared_mempool, &share_handle, + hipMemHandleTypeNone, 0), + hipErrorInvalidValue); + } +} + +/** + * End doxygen group hipMemPoolImportFromShareableHandle. + * @} + */ + + +/** + * @addtogroup hipMemPoolExportPointer hipMemPoolExportPointer + * @{ + * @ingroup StreamOTest + * `hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* dev_ptr)` - Export data to share a memory pool allocation between processes. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipMemPoolExportImport_Functional + * - @ref Unit_hipMemPoolExportImport_IPC_Functional + * - @ref Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolExportPointer behavior with invalid arguments: + * -# Invalid exported data + * -# Invalid device pointer + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportPointer_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + void* ptr; + hipMemPoolPtrExportData export_ptr; + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + + StreamGuard stream(Streams::withFlags, hipStreamNonBlocking); + int share_handle; + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = handle_type; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, mempool, stream.stream())); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&share_handle, mempool, handle_type, 0)); + + memset(&export_ptr, 0, sizeof(hipMemPoolPtrExportData)); + + SECTION("Invalid exported data") { + HIP_CHECK_ERROR(hipMemPoolExportPointer(nullptr, reinterpret_cast(ptr)), + hipErrorInvalidValue); + } + + SECTION("Invalid device pointer") { + HIP_CHECK_ERROR(hipMemPoolExportPointer(&export_ptr, nullptr), hipErrorInvalidValue); + } + + HIP_CHECK(hipFreeAsync(ptr, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * End doxygen group hipMemPoolExportPointer. + * @} + */ + + +/** + * @addtogroup hipMemPoolImportPointer hipMemPoolImportPointer + * @{ + * @ingroup StreamOTest + * `hipMemPoolImportPointer(void** dev_ptr, hipMemPool_t mem_pool, hipMemPoolPtrExportData* export_data)` - Import a memory pool allocation from another process. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipMemPoolExportImport_Functional + * - @ref Unit_hipMemPoolExportImport_IPC_Functional + * - @ref Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolImportPointer behavior with invalid arguments: + * -# Invalid device ptr + * -# Invalid Memory Pool + * -# Invalid exported data + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolImportPointer_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + void* ptr; + hipMemPoolPtrExportData export_ptr; + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + + StreamGuard stream(Streams::withFlags, hipStreamNonBlocking); + int share_handle; + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = handle_type; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + // Allocate memory in a stream from the pool just created + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, mempool, stream.stream())); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&share_handle, mempool, handle_type, 0)); + + memset((void*)&export_ptr, 0, sizeof(hipMemPoolPtrExportData)); + HIP_CHECK(hipMemPoolExportPointer(&export_ptr, reinterpret_cast(ptr))); + + hipMemPool_t shared_mempool; + int* shared_ptr = nullptr; + + HIP_CHECK(hipMemPoolImportFromShareableHandle( + &shared_mempool, reinterpret_cast(share_handle), handle_type, 0)); + + hipMemAccessFlags access_flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = 0; + HIP_CHECK(hipMemPoolGetAccess(&access_flags, shared_mempool, &location)); + if (access_flags != hipMemAccessFlagsProtReadWrite) { + hipMemAccessDesc desc; + memset(&desc, 0, sizeof(hipMemAccessDesc)); + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = 0; + desc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(shared_mempool, &desc, 1)); + } + + SECTION("Invalid device ptr") { + HIP_CHECK_ERROR(hipMemPoolImportPointer(nullptr, shared_mempool, &export_ptr), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool") { + HIP_CHECK_ERROR( + hipMemPoolImportPointer(reinterpret_cast(&shared_ptr), nullptr, &export_ptr), + hipErrorInvalidValue); + } + + SECTION("Invalid exported data") { + HIP_CHECK_ERROR( + hipMemPoolImportPointer(reinterpret_cast(&shared_ptr), shared_mempool, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipFreeAsync(ptr, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * End doxygen group hipMemPoolImportPointer. + * @} + */ diff --git a/catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc b/catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc new file mode 100644 index 0000000000..1310b6ecf6 --- /dev/null +++ b/catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc @@ -0,0 +1,422 @@ +/* + Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +#ifdef _WIN64 +#define NOMINMAX +#endif /* _WIN64 */ + +#include "helper_multiprocess.hh" + +#include +#include +#include + +/** + * @addtogroup hipMemPoolExportToShareableHandle hipMemPoolExportToShareableHandle + * @{ + * @ingroup StreamOTest + * `hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + * hipMemAllocationHandleType handle_type, unsigned int flags)` - Exports a memory pool to the + * requested handle type. + */ + +#ifdef __linux__ + +static const char shm_name[] = "mempool_test_shm"; +static const char ipc_name[] = "mempool_test_pipe"; + +static constexpr int kMaxDevices = 8; + +typedef struct shmStruct_st { + Process processes[kMaxDevices]; + hipMemPoolPtrExportData exportPtrData[kMaxDevices]; +} shmStruct; + +typedef struct ipcBarrier { + int count; + bool sense; + bool allExit; +} ipcBarrier_t; + +typedef struct ipcDevices { + int count; + int ordinals[kMaxDevices]; +} ipcDevices_t; + +static ipcBarrier_t* g_Barrier{}; +static bool g_procSense; +static int g_processCnt; + +/* + Get device with P2P access to device 0. +*/ +static void get_devices(ipcDevices_t* devices) { + pid_t pid = fork(); + + if (!pid) { + // HIP APIs are called in child process, + // to avoid HIP initialization in main process. + int i, device_count; + HIP_CHECK(hipGetDeviceCount(&device_count)); + + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + devices->count = 0; + exit(EXIT_SUCCESS); + } + + // Device 0 + devices->ordinals[0] = 0; + devices->count = 1; + + if (device_count < 2) { + exit(EXIT_SUCCESS); + } + + int can_peer_access_0i, can_peer_access_i0; + for (i = 1; i < device_count; i++) { + HIP_CHECK(hipDeviceCanAccessPeer(&can_peer_access_0i, 0, i)); + HIP_CHECK(hipDeviceCanAccessPeer(&can_peer_access_i0, i, 0)); + HIP_CHECK( + hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, i)); + + if (can_peer_access_0i * can_peer_access_i0 * mem_pool_support) { + devices->ordinals[i] = i; + INFO("Two-way peer access is available between GPU" << devices->ordinals[0] << " and GPU" + << devices->ordinals[i]); + devices->count += 1; + if (devices->count >= kMaxDevices) break; + } else { + break; + } + } + + exit(EXIT_SUCCESS); + } else { + int status; + waitpid(pid, &status, 0); + HIP_ASSERT(!status); + } +} + +/* + Calling process waits for other processes to signal/complete. +*/ +static void process_barrier() { + int newCount = __sync_add_and_fetch(&g_Barrier->count, 1); + + if (newCount == g_processCnt) { + g_Barrier->count = 0; + g_Barrier->sense = !g_procSense; + + } else { + while (g_Barrier->sense == g_procSense) { + if (!g_Barrier->allExit) { + sched_yield(); + } else { + exit(EXIT_FAILURE); + } + } + } + + g_procSense = !g_procSense; +} + +/* Child process(es) import shared memory pool and check if allocated memory can be accessed and + * used*/ +static void child_process(int id) { + volatile shmStruct* shm = NULL; + hipStream_t stream; + sharedMemoryInfo info; + void* ptr; + + LinearAllocGuard host_ptr(LinearAllocs::hipHostMalloc, kPageSize); + + ipcHandle* ipc_child_handle = NULL; + checkIpcErrors(ipcOpenSocket(ipc_child_handle)); + + // wait for parent process to create shared memory + process_barrier(); + + if (sharedMemoryOpen(shm_name, sizeof(shmStruct), &info) != 0) { + INFO("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = reinterpret_cast(info.addr); + shm->processes[id] = getpid(); + + // wait for parent process to send shareable handle + process_barrier(); + + // Receive allocation handle shared by parent. + std::vector sh_handle(1); + checkIpcErrors(ipcRecvShareableHandles(ipc_child_handle, sh_handle)); + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); + + hipMemPool_t pool; + + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + + // Import mem pool from all the devices created in the master + // process using shareable handles received via socket + // and import the pointer to the allocated buffer using + // exportData filled in shared memory by the master process. + HIP_CHECK(hipMemPoolImportFromShareableHandle(&pool, reinterpret_cast(sh_handle[0]), + handle_type, 0)); + + hipMemAccessFlags access_flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = 0; + HIP_CHECK(hipMemPoolGetAccess(&access_flags, pool, &location)); + if (access_flags != hipMemAccessFlagsProtReadWrite) { + hipMemAccessDesc desc; + memset(&desc, 0, sizeof(hipMemAccessDesc)); + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = 0; + desc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(pool, &desc, 1)); + } + + // Import the allocation from memory pool using the opaque export data retrieved through + // the shared memory + HIP_CHECK(hipMemPoolImportPointer(&ptr, pool, + const_cast(&shm->exportPtrData[id]))); + + // Since we have imported allocations shared by the parent with us, we can + // close this ShareableHandle. + checkIpcErrors(ipcCloseShareableHandle(sh_handle[0])); + + // Since we have imported allocations shared by the parent with us, we can + // close the socket + checkIpcErrors(ipcCloseSocket(ipc_child_handle)); + + // Child processed accesses imported buffer + const auto element_count = kPageSize / sizeof(int); + constexpr auto thread_count = 1024; + const auto block_count = element_count / thread_count + 1; + int expected_value = 12 + id; + VectorSet<<>>((int*)ptr, expected_value, element_count); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Copy the buffer locally + HIP_CHECK(hipMemcpyAsync(host_ptr.host_ptr(), ptr, kPageSize, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + INFO("Process " << id << "verifying...\n"); + + // Check if the content is as expected + ArrayFindIfNot(host_ptr.host_ptr(), expected_value, element_count); + + // Free the memory before the exporter process frees it + HIP_CHECK(hipFreeAsync(ptr, stream)); + + // And wait for all the queued up work to complete + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +static void parent_process(int dev_count) { + sharedMemoryInfo info; + int i; + volatile shmStruct* shm = NULL; + std::vector ptrs; + std::vector child_processes; + + if (sharedMemoryCreate(shm_name, sizeof(*shm), &info) != 0) { + INFO("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = (volatile shmStruct*)info.addr; + memset((void*)shm, 0, sizeof(*shm)); + + // wait for child processes to insert their pids into shared memory + process_barrier(); + + std::vector shareable_handles(dev_count); + std::vector streams(dev_count); + std::vector pools(dev_count); + + // Now allocate memory for each process and fill the shared + // memory buffer with the export data and get mempool handles to communicate + for (i = 0; i < dev_count; i++) { + void* ptr; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreateWithFlags(&streams[i], hipStreamNonBlocking)); + // Allocate an explicit pool with IPC capabilities + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = i; + + HIP_CHECK(hipMemPoolCreate(&pools[i], &pool_props)); + + // Query the shareable handle for the pool + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + // Allocate memory in a stream from the pool just created + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, pools[i], streams[i])); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&shareable_handles[i], pools[i], handle_type, 0)); + + // Memset handle to 0 to make sure call to hipMemPoolImportPointer in + // child process will fail if the following call to hipMemPoolExportPointer fails. + memset((void*)&shm->exportPtrData[i], 0, sizeof(hipMemPoolPtrExportData)); + HIP_CHECK( + hipMemPoolExportPointer(const_cast(&shm->exportPtrData[i]), ptr)); + ptrs.push_back(ptr); + child_processes.push_back(static_cast(shm->processes[i])); + } + + ipcHandle* ipc_parent_handle; + checkIpcErrors(ipcCreateSocket(ipc_parent_handle, ipc_name, child_processes)); + + for (i = 0; i < dev_count; i++) { + std::vector current_handle(1, shareable_handles[i]); + std::vector current_process(1, child_processes[i]); + checkIpcErrors(ipcSendShareableHandles(ipc_parent_handle, current_handle, current_process)); + } + + // Close the shareable handles as they are not needed anymore. + for (int i = 0; i < dev_count; i++) { + checkIpcErrors(ipcCloseShareableHandle(shareable_handles[i])); + } + + checkIpcErrors(ipcCloseSocket(ipc_parent_handle)); + + process_barrier(); + + // And wait for them to finish + for (i = 0; i < child_processes.size(); i++) { + if (waitProcess(&child_processes[i]) != EXIT_SUCCESS) { + INFO("Process " << i << " failed!\n"); + exit(EXIT_FAILURE); + } + } + + // Clean up! + for (i = 0; i < dev_count; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipFreeAsync(ptrs[i], streams[i])); + HIP_CHECK(hipStreamSynchronize(streams[i])); + HIP_CHECK(hipMemPoolDestroy(pools[i])); + HIP_CHECK(hipStreamDestroy(streams[i])); + } + + sharedMemoryClose(&info); +} + +/** + * Test Description + * ------------------------ + * - Test to verify exporting/importing a shareable handle on a single device between parent and + * child process using IPC mechanisms - shared memory and sockets. + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImportIPC.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportImport_IPC_Functional") { + ipcDevices_t* shm_devices; + shm_devices = reinterpret_cast( + mmap(NULL, sizeof(*shm_devices), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + REQUIRE(MAP_FAILED != shm_devices); + // Barrier is used to synchronize created processes + g_Barrier = reinterpret_cast( + mmap(NULL, sizeof(*g_Barrier), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + memset(g_Barrier, 0, sizeof(*g_Barrier)); + + // set local barrier sense flag + g_procSense = 0; + + get_devices(shm_devices); + if (!shm_devices->count) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + // Set device count to 1 + shm_devices->count = 1; + g_processCnt = shm_devices->count + 1; + int index = 0; + + Process process = fork(); + if (process != 0) { + parent_process(shm_devices->count); + } else { + child_process(index); + } +} + +/** + * Test Description + * ------------------------ + * - Test to verify exporting/importing a shareable handle on multiple devices between parent and + * child processes using IPC mechanisms - shared memory and sockets. + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImportIPC.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional") { + ipcDevices_t* shm_devices; + shm_devices = reinterpret_cast( + mmap(NULL, sizeof(*shm_devices), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + REQUIRE(MAP_FAILED != shm_devices); + // Barrier is used to synchronize processes created. + g_Barrier = reinterpret_cast( + mmap(NULL, sizeof(*g_Barrier), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + memset(g_Barrier, 0, sizeof(*g_Barrier)); + + // set local barrier sense flag + g_procSense = 0; + + get_devices(shm_devices); + if (!shm_devices->count) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + g_processCnt = shm_devices->count + 1; + + int index = 0; + + for (int i = 1; i < g_processCnt; i++) { + Process process = fork(); + if (!process) { + index = i; + break; + } + } + + if (index == 0) { + parent_process(shm_devices->count); + } else { + child_process(index - 1); + } +} +#endif diff --git a/catch/unit/surface/CMakeLists.txt b/catch/unit/surface/CMakeLists.txt index d2afb5c702..43c7eee343 100644 --- a/catch/unit/surface/CMakeLists.txt +++ b/catch/unit/surface/CMakeLists.txt @@ -20,13 +20,22 @@ # Common Tests - Test independent of all platforms set(TEST_SRC - hipSurfaceObj1D.cc - hipSurfaceObj2D.cc - hipSurfaceObj3D.cc hipCreateSurfaceObject.cc hipDestroySurfaceObject.cc + surf1D.cc + surf1DLayered.cc + surf2D.cc + surf2DLayered.cc + surf3D.cc + surfCubemap.cc ) +if(HIP_PLATFORM MATCHES "nvidia") # Disabled on AMD due to defect EXSWHTEC-377 +set(TEST_SRC + ${TEST_SRC} + surfCubemapLayered.cc) +endif() + hip_add_exe_to_target(NAME SurfaceTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) \ No newline at end of file diff --git a/catch/unit/surface/hipSurfaceObj1D.cc b/catch/unit/surface/surf1D.cc similarity index 60% rename from catch/unit/surface/hipSurfaceObj1D.cc rename to catch/unit/surface/surf1D.cc index 701a99666d..20286ef483 100644 --- a/catch/unit/surface/hipSurfaceObj1D.cc +++ b/catch/unit/surface/surf1D.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -16,18 +19,22 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include + +/** + * @addtogroup surf1D surf1D + * @{ + * @ingroup SurfaceTest + */ + #include +#include #include #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wunused-parameter" template -__global__ void -surf1DKernelR(hipSurfaceObject_t surfaceObject, - T* outputData, int width) -{ +__global__ void surf1DKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width) { @@ -37,10 +44,7 @@ surf1DKernelR(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf1DKernelW(hipSurfaceObject_t surfaceObject, - T* inputData, int width) -{ +__global__ void surf1DKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width) { @@ -50,10 +54,8 @@ surf1DKernelW(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf1DKernelRW(hipSurfaceObject_t surfaceObject, - hipSurfaceObject_t outputSurfObj, int width) -{ +__global__ void surf1DKernelRW(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, + int width) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width) { @@ -64,14 +66,11 @@ surf1DKernelRW(hipSurfaceObject_t surfaceObject, #endif } -template -static void runTestR(const int width) -{ +template static void runTestR(const int width) { unsigned int size = width * sizeof(T); - T *hData = (T*) malloc (size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int j = 0; j < width; j++) - { + for (int j = 0; j < width; j++) { initVal(hData[j]); } @@ -91,12 +90,12 @@ static void runTestR(const int width) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - T *hOutputData = nullptr; + T* hOutputData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); memset(hOutputData, 0, size); - dim3 dimBlock (16, 1, 1); - dim3 dimGrid ((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); surf1DKernelR<<>>(surfaceObject, hOutputData, width); @@ -105,8 +104,8 @@ static void runTestR(const int width) for (int j = 0; j < width; j++) { if (!isEqual(hData[j], hOutputData[j])) { - printf("Difference [ %d ]:%s ----%s\n", j, - getString(hData[j]).c_str(), getString(hOutputData[j]).c_str()); + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); REQUIRE(false); } } @@ -115,14 +114,11 @@ static void runTestR(const int width) HIP_CHECK(hipFreeArray(hipArray)); free(hData); HIP_CHECK(hipHostFree(hOutputData)); - REQUIRE(true); } -template -static void runTestW(const int width) -{ +template static void runTestW(const int width) { unsigned int size = width * sizeof(T); - T *hData = nullptr; + T* hData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hData, size)); memset(hData, 0, size); @@ -142,27 +138,26 @@ static void runTestW(const int width) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - for (int j = 0; j < width; j++) - { + for (int j = 0; j < width; j++) { initVal(hData[j]); } - dim3 dimBlock (16, 1, 1); - dim3 dimGrid ((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); surf1DKernelW<<>>(surfaceObject, hData, width); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); HIP_CHECK(hipMemcpyFromArray(hOutputData, hipArray, 0, 0, size, hipMemcpyDeviceToHost)); for (int j = 0; j < width; j++) { if (!isEqual(hData[j], hOutputData[j])) { - printf("Difference [ %d ]:%s ----%s\n", j, - getString(hData[j]).c_str(), getString(hOutputData[j]).c_str()); + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); REQUIRE(false); } } @@ -171,18 +166,13 @@ static void runTestW(const int width) HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipHostFree(hData)); free(hOutputData); - REQUIRE(true); } - -template -static void runTestRW(const int width) -{ +template static void runTestRW(const int width) { unsigned int size = width * sizeof(T); - T *hData = (T*) malloc (size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int j = 0; j < width; j++) - { + for (int j = 0; j < width; j++) { initVal(hData[j]); } @@ -210,24 +200,24 @@ static void runTestRW(const int width) resOutDesc.res.array.array = hipOutArray; hipSurfaceObject_t outSurfaceObject = 0; - HIP_CHECK(hipCreateSurfaceObject (&outSurfaceObject, &resOutDesc)); + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); - dim3 dimBlock (16, 1, 1); - dim3 dimGrid ((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); surf1DKernelRW<<>>(surfaceObject, outSurfaceObject, width); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); HIP_CHECK(hipMemcpyFromArray(hOutputData, hipOutArray, 0, 0, size, hipMemcpyDeviceToHost)); for (int j = 0; j < width; j++) { if (!isEqual(hData[j], hOutputData[j])) { - printf("Difference [ %d ]:%s ----%s\n", j, - getString(hData[j]).c_str(), getString(hOutputData[j]).c_str()); + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); REQUIRE(false); } } @@ -238,83 +228,67 @@ static void runTestRW(const int width) HIP_CHECK(hipFreeArray(hipOutArray)); free(hData); free(hOutputData); - REQUIRE(true); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj1D_type_R", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf1Dread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1Dread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj1D_type_R - 31") { - runTestR(31); - } - - SECTION("Unit_hipSurfaceObj1D_type_R - 67") { - runTestR(67); - } - - SECTION("Unit_hipSurfaceObj1D_type_R - 131") { - runTestR(131); - } - - SECTION("Unit_hipSurfaceObj1D_type_R - 263") { - runTestR(263); - } + const int width = GENERATE(31, 67, 131, 263); + runTestR(width); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj1D_type_W", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf1Dwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1Dwrite_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj1D_type_W - 31") { - runTestW(31); - } - - SECTION("Unit_hipSurfaceObj1D_type_W - 63") { - runTestW(63); - } - - SECTION("Unit_hipSurfaceObj1D_type_W - 131") { - runTestW(131); - } - - SECTION("Unit_hipSurfaceObj1D_type_W - 263") { - runTestW(263); - } + const int width = GENERATE(31, 67, 131, 263); + runTestW(width); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj1D_type_RW", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf1Dread` and `surf1Dwrite` together, with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1D_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj1D_type_RW - 23") { - runTestRW(23); - } - - SECTION("Unit_hipSurfaceObj1D_type_RW - 67") { - runTestRW(67); - } - - SECTION("Unit_hipSurfaceObj1D_type_RW - 131") { - runTestRW(131); - } - - SECTION("Unit_hipSurfaceObj1D_type_RW - 263") { - runTestRW(263); - } + const int width = GENERATE(31, 67, 131, 263); + runTestRW(width); } diff --git a/catch/unit/surface/surf1DLayered.cc b/catch/unit/surface/surf1DLayered.cc new file mode 100644 index 0000000000..3432524527 --- /dev/null +++ b/catch/unit/surface/surf1DLayered.cc @@ -0,0 +1,294 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surf1DLayered surf1DLayered + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +template +__global__ void surf1DLayeredKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < width) { + surf1DLayeredread(outputData + x, surfaceObject, x * sizeof(T), 0); + } +#endif +} + +template +__global__ void surf1DLayeredKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < width) { + surf1DLayeredwrite(inputData[x], surfaceObject, x * sizeof(T), 0); + } +#endif +} + +template +__global__ void surf1DLayeredKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < width) { + T data; + surf1DLayeredread(&data, surfaceObject, x * sizeof(T), 0); + surf1DLayeredwrite(data, outputSurfObj, x * sizeof(T), 0); + } +#endif +} + +template static void runTestR(const int width) { + unsigned int size = width * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int j = 0; j < width; j++) { + initVal(hData[j]); + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + HIP_CHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + + surf1DLayeredKernelR<<>>(surfaceObject, hOutputData, width); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int j = 0; j < width; j++) { + if (!isEqual(hData[j], hOutputData[j])) { + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); + REQUIRE(false); + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width) { + unsigned int size = width * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + HIP_CHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int j = 0; j < width; j++) { + initVal(hData[j]); + } + + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + + surf1DLayeredKernelW<<>>(surfaceObject, hData, width); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpyFromArray(hOutputData, hipArray, 0, 0, size, hipMemcpyDeviceToHost)); + + for (int j = 0; j < width; j++) { + if (!isEqual(hData[j], hOutputData[j])) { + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); + REQUIRE(false); + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width) { + unsigned int size = width * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int j = 0; j < width; j++) { + initVal(hData[j]); + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + HIP_CHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + + surf1DLayeredKernelRW<<>>(surfaceObject, outSurfaceObject, width); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpyFromArray(hOutputData, hipOutArray, 0, 0, size, hipMemcpyDeviceToHost)); + + for (int j = 0; j < width; j++) { + if (!isEqual(hData[j], hOutputData[j])) { + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); + REQUIRE(false); + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf1DLayeredread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1DLayeredread_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67, 131, 263); + runTestR(width); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf1DLayeredwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1DLayeredwrite_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67, 131, 263); + runTestW(width); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf1DLayeredread` and `surf1DLayeredwrite` together, with different types + * and dimensions. Test source + * ------------------------ + * - unit/surface/surf1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1DLayered_Positive_ReadWrite", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67, 131, 263); + runTestRW(width); +} diff --git a/catch/unit/surface/hipSurfaceObj2D.cc b/catch/unit/surface/surf2D.cc similarity index 55% rename from catch/unit/surface/hipSurfaceObj2D.cc rename to catch/unit/surface/surf2D.cc index 1fdc0eee9e..ca504b178b 100644 --- a/catch/unit/surface/hipSurfaceObj2D.cc +++ b/catch/unit/surface/surf2D.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -16,8 +19,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include + +/** + * @addtogroup surf2D surf2D + * @{ + * @ingroup SurfaceTest + */ + #include +#include #include #pragma clang diagnostic ignored "-Wunused-variable" @@ -26,10 +36,8 @@ THE SOFTWARE. #define LOG_DATA 0 template -__global__ void -surf2DKernelR(hipSurfaceObject_t surfaceObject, - T* outputData, int width, int height) -{ +__global__ void surf2DKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -40,10 +48,8 @@ surf2DKernelR(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf2DKernelW(hipSurfaceObject_t surfaceObject, - T* inputData, int width, int height) -{ +__global__ void surf2DKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -54,10 +60,8 @@ surf2DKernelW(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf2DKernelRW(hipSurfaceObject_t surfaceObject, - hipSurfaceObject_t outputSurfObj, int width, int height) -{ +__global__ void surf2DKernelRW(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, + int width, int height) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -69,29 +73,24 @@ surf2DKernelRW(hipSurfaceObject_t surfaceObject, #endif } -template -static void runTestR(const int width, const int height) -{ +template static void runTestR(const int width, const int height) { unsigned int size = width * height * sizeof(T); - T* hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int i = 0; i < height; i++) - { - for (int j = 0; j < width; j++) - { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { initVal(hData[i * width + j]); } } hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t hipArray = nullptr; - HIP_CHECK(hipMallocArray (&hipArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); // Need set source pitch, but we don't have any padding here const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, - hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -106,8 +105,8 @@ static void runTestR(const int width, const int height) HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); memset(hOutputData, 0, size); - dim3 dimBlock (16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); surf2DKernelR<<>>(surfaceObject, hOutputData, width, height); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); @@ -116,8 +115,8 @@ static void runTestR(const int width, const int height) for (int j = 0; j < width; j++) { int index = i * width + j; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d ]:%s ----%s\n", i, j, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } @@ -127,12 +126,9 @@ static void runTestR(const int width, const int height) HIP_CHECK(hipFreeArray(hipArray)); free(hData); HIP_CHECK(hipHostFree(hOutputData)); - REQUIRE(true); } -template -static void runTestW(const int width, const int height) -{ +template static void runTestW(const int width, const int height) { unsigned int size = width * height * sizeof(T); T* hData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hData, size)); @@ -140,13 +136,12 @@ static void runTestW(const int width, const int height) hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t hipArray = nullptr; - HIP_CHECK(hipMallocArray (&hipArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); // Need set source pitch, but we don't have any padding here const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, - hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -157,32 +152,30 @@ static void runTestW(const int width, const int height) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - for (int i = 0; i < height; i++) - { - for (int j = 0; j < width; j++) - { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { initVal(hData[i * width + j]); } } - dim3 dimBlock (16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); surf2DKernelW<<>>(surfaceObject, hData, width, height); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T* hOutputData = (T*) malloc(size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); - HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, - height, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { int index = i * width + j; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d ]:%s ----%s\n", i, j, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } @@ -192,40 +185,33 @@ static void runTestW(const int width, const int height) HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipHostFree(hData)); free(hOutputData); - REQUIRE(true); } -template -static void runTestRW(const int width, const int height) -{ +template static void runTestRW(const int width, const int height) { unsigned int size = width * height * sizeof(T); - T* hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int i = 0; i < height; i++) - { - for (int j = 0; j < width; j++) - { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { initVal(hData[i * width + j]); } } #if LOG_DATA - printf ("hData: "); - for (int i = 0; i < 32; i++) - { - printf ("%s ", getString(hData[i]).c_str()); + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); } - printf ("\n"); + printf("\n"); #endif hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t hipArray = nullptr, hipOutArray = nullptr; - HIP_CHECK(hipMallocArray (&hipArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); // Need set source pitch, but we don't have any padding here const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, - hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -236,8 +222,7 @@ static void runTestRW(const int width, const int height) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); hipResourceDesc resOutDesc; memset(&resOutDesc, 0, sizeof(resOutDesc)); @@ -245,35 +230,34 @@ static void runTestRW(const int width, const int height) resOutDesc.res.array.array = hipOutArray; hipSurfaceObject_t outSurfaceObject = 0; - HIP_CHECK(hipCreateSurfaceObject (&outSurfaceObject, &resOutDesc)); + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); - dim3 dimBlock (16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); surf2DKernelRW<<>>(surfaceObject, outSurfaceObject, width, height); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T* hOutputData = (T*) malloc(size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); - HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, - height, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); #if LOG_DATA - printf ("dData: "); - for (int i = 0; i < 32; i++) - { - printf ("%s ", getString(hOutputData[i]).c_str()); + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); } - printf ("\n"); + printf("\n"); #endif for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { int index = i * width + j; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d ]:%s ----%s\n", i, j, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } @@ -285,83 +269,70 @@ static void runTestRW(const int width, const int height) HIP_CHECK(hipFreeArray(hipOutArray)); free(hData); free(hOutputData); - REQUIRE(true); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj2D_type_R", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf2Dread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2Dread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj2D_type_R - 23, 67") { - runTestR(23, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_R - 67, 23") { - runTestR(67, 23); - } - - SECTION("Unit_hipSurfaceObj2D_type_R - 131, 67") { - runTestR(131, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_R - 263, 131") { - runTestR(263, 131); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj2D_type_W", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf2Dwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2Dwrite_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj2D_type_W - 23, 67") { - runTestW(23, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_W - 67, 23") { - runTestW(67, 23); - } - - SECTION("Unit_hipSurfaceObj2D_type_W - 131, 67") { - runTestW(131, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_W - 263, 23") { - runTestW(263, 23); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj2D_type_RW", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf2Dread` and `surf2Dwrite` together, with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2D_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj2D_type_RW - 23, 67") { - runTestRW(23, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_RW - 67, 131") { - runTestRW(67, 131); - } - - SECTION("Unit_hipSurfaceObj2D_type_RW - 131, 263") { - runTestRW(131, 263); - } - - SECTION("Unit_hipSurfaceObj2D_type_RW - 263, 67") { - runTestRW(263, 67); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); } diff --git a/catch/unit/surface/surf2DLayered.cc b/catch/unit/surface/surf2DLayered.cc new file mode 100644 index 0000000000..c8f06bdfc2 --- /dev/null +++ b/catch/unit/surface/surf2DLayered.cc @@ -0,0 +1,338 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surf2DLayered surf2DLayered + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +#define LOG_DATA 0 + +template +__global__ void surf2DLayeredKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surf2DLayeredread(outputData + y * width + x, surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surf2DLayeredKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surf2DLayeredwrite(inputData[y * width + x], surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surf2DLayeredKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width, int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + T data; + surf2DLayeredread(&data, surfaceObject, x * sizeof(T), y, 0); + surf2DLayeredwrite(data, outputSurfObj, x * sizeof(T), y, 0); + } +#endif +} + +template static void runTestR(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surf2DLayeredKernelR<<>>(surfaceObject, hOutputData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surf2DLayeredKernelW<<>>(surfaceObject, hData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } +#if LOG_DATA + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); + } + printf("\n"); +#endif + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surf2DLayeredKernelRW<<>>(surfaceObject, outSurfaceObject, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + +#if LOG_DATA + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); + } + printf("\n"); +#endif + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf2DLayeredread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2DLayeredread_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf2DLayeredwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2DLayeredwrite_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf2DLayeredread` and `surf2DLayeredwrite` together, with different types + * and dimensions. Test source + * ------------------------ + * - unit/surface/surf2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2DLayered_Positive_ReadWrite", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); +} diff --git a/catch/unit/surface/hipSurfaceObj3D.cc b/catch/unit/surface/surf3D.cc similarity index 64% rename from catch/unit/surface/hipSurfaceObj3D.cc rename to catch/unit/surface/surf3D.cc index 7cc3889e6f..d209f09115 100644 --- a/catch/unit/surface/hipSurfaceObj3D.cc +++ b/catch/unit/surface/surf3D.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -16,50 +19,49 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include + +/** + * @addtogroup surf3D surf3D + * @{ + * @ingroup SurfaceTest + */ + #include +#include #include #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wunused-parameter" template -__global__ void -surf3DKernelR(hipSurfaceObject_t surfaceObject, - T* outputData, int width, int height, int depth) -{ +__global__ void surf3DKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height, int depth) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; int z = blockIdx.z * blockDim.z + threadIdx.z; if (x < width && y < height && z < depth) { - surf3Dread(outputData + z * width * height + y * width + x, - surfaceObject, x * sizeof(T), y, z); + surf3Dread(outputData + z * width * height + y * width + x, surfaceObject, x * sizeof(T), y, z); } #endif } template -__global__ void -surf3DKernelW(hipSurfaceObject_t surfaceObject, - T* inputData, int width, int height, int depth) -{ +__global__ void surf3DKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, int height, + int depth) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; int z = blockIdx.z * blockDim.z + threadIdx.z; if (x < width && y < height && z < depth) { - surf3Dwrite(inputData[z * width * height + y * width + x], - surfaceObject, x * sizeof(T), y, z); + surf3Dwrite(inputData[z * width * height + y * width + x], surfaceObject, x * sizeof(T), y, z); } #endif } template -__global__ void -surf3DKernelRW(hipSurfaceObject_t surfaceObject, - hipSurfaceObject_t outputSurfObj, int width, int height, int depth) -{ +__global__ void surf3DKernelRW(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, + int width, int height, int depth) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -72,11 +74,9 @@ surf3DKernelRW(hipSurfaceObject_t surfaceObject, #endif } -template -static void runTestR(const int width, const int height, const int depth) -{ +template static void runTestR(const int width, const int height, const int depth) { unsigned int size = width * height * depth * sizeof(T); - T *hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); for (int i = 0; i < depth; i++) { for (int j = 0; j < height; j++) { @@ -94,8 +94,8 @@ static void runTestR(const int width, const int height, const int depth) hipMemcpy3DParms myparms; memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); myparms.dstArray = hipArray; myparms.extent = make_hipExtent(width, height, depth); @@ -112,12 +112,12 @@ static void runTestR(const int width, const int height, const int depth) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - T *hOutputData = nullptr; + T* hOutputData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); memset(hOutputData, 0, size); - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, + dim3 dimBlock(8, 8, 8); // 512 threads + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, (depth + dimBlock.z - 1) / dimBlock.z); surf3DKernelR<<>>(surfaceObject, hOutputData, width, height, depth); @@ -130,26 +130,23 @@ static void runTestR(const int width, const int height, const int depth) for (int k = 0; k < width; k++) { int index = i * width * height + j * width + k; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } } } - HIP_CHECK(hipDestroySurfaceObject (surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); HIP_CHECK(hipFreeArray(hipArray)); free(hData); HIP_CHECK(hipHostFree(hOutputData)); - REQUIRE(true); } -template -static void runTestW(const int width, const int height, const int depth) -{ +template static void runTestW(const int width, const int height, const int depth) { unsigned int size = width * height * depth * sizeof(T); - T *hData = nullptr; + T* hData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hData, size)); memset(hData, 0, size); @@ -161,8 +158,8 @@ static void runTestW(const int width, const int height, const int depth) hipMemcpy3DParms myparms; memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); myparms.dstArray = hipArray; myparms.extent = make_hipExtent(width, height, depth); @@ -187,8 +184,8 @@ static void runTestW(const int width, const int height, const int depth) } } - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, + dim3 dimBlock(8, 8, 8); // 512 threads + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, (depth + dimBlock.z - 1) / dimBlock.z); surf3DKernelW<<>>(surfaceObject, hData, width, height, depth); @@ -196,13 +193,13 @@ static void runTestW(const int width, const int height, const int depth) HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcArray= hipArray; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.srcArray = hipArray; myparms.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(T), width, height); myparms.extent = make_hipExtent(width, height, depth); myparms.kind = hipMemcpyDeviceToHost; @@ -214,26 +211,23 @@ static void runTestW(const int width, const int height, const int depth) for (int k = 0; k < width; k++) { int index = i * width * height + j * width + k; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } } } - HIP_CHECK(hipDestroySurfaceObject (surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipHostFree(hData)); free(hOutputData); - REQUIRE(true); } -template -static void runTestRW(const int width, const int height, const int depth) -{ +template static void runTestRW(const int width, const int height, const int depth) { unsigned int size = width * height * depth * sizeof(T); - T *hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); for (int i = 0; i < depth; i++) { for (int j = 0; j < height; j++) { @@ -251,8 +245,8 @@ static void runTestRW(const int width, const int height, const int depth) hipMemcpy3DParms myparms; memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); myparms.dstArray = hipArray; myparms.extent = make_hipExtent(width, height, depth); @@ -280,8 +274,8 @@ static void runTestRW(const int width, const int height, const int depth) hipSurfaceObject_t outSurfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, + dim3 dimBlock(8, 8, 8); // 512 threads + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, (depth + dimBlock.z - 1) / dimBlock.z); surf3DKernelRW<<>>(surfaceObject, outSurfaceObject, width, height, depth); @@ -289,13 +283,13 @@ static void runTestRW(const int width, const int height, const int depth) HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcArray= hipOutArray; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.srcArray = hipOutArray; myparms.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(T), width, height); myparms.extent = make_hipExtent(width, height, depth); myparms.kind = hipMemcpyDeviceToHost; @@ -307,97 +301,87 @@ static void runTestRW(const int width, const int height, const int depth) for (int k = 0; k < width; k++) { int index = i * width * height + j * width + k; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } } } - HIP_CHECK(hipDestroySurfaceObject (surfaceObject)); - HIP_CHECK(hipDestroySurfaceObject (outSurfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipFreeArray(hipOutArray)); free(hData); free(hOutputData); - REQUIRE(true); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj3D_type_R", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf3Dread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf3Dread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj3D_type_R - 31, 67, 131") { - runTestR(31, 67, 131); - } - - SECTION("Unit_hipSurfaceObj3D_type_R - 67, 31, 263") { - runTestR(67, 31, 263); - } - - SECTION("Unit_hipSurfaceObj3D_type_R - 131, 131, 67") { - runTestR(131, 131, 67); - } - - SECTION("Unit_hipSurfaceObj3D_type_R - 263, 131, 263") { - runTestR(263, 131, 263); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + const int depth = GENERATE(4, 11); + runTestR(width, height, depth); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj3D_type_W", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf3Dwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf3Dwrite_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj3D_type_W - 31, 67, 131") { - runTestW(31, 67, 131); - } - - SECTION("Unit_hipSurfaceObj3D_type_W - 67, 67, 31") { - runTestW(67, 67, 31); - } - - SECTION("Unit_hipSurfaceObj3D_type_W - 131, 131, 67") { - runTestW(131, 131, 67); - } - - SECTION("Unit_hipSurfaceObj3D_type_W - 263, 131, 263") { - runTestW(263, 131, 263); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + const int depth = GENERATE(4, 11); + runTestR(width, height, depth); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj3D_type_RW", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf3Dread` and `surf3Dwrite` together, with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf3D_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj3D_type_RW - 31, 31, 67") { - runTestRW(31, 31, 67); - } - - SECTION("Unit_hipSurfaceObj3D_type_RW - 67, 67, 31") { - runTestRW(67, 67, 31); - } - - SECTION("Unit_hipSurfaceObj3D_type_RW - 131, 67, 263") { - runTestRW(131, 67, 263); - } - - SECTION("Unit_hipSurfaceObj3D_type_RW - 263, 131, 263") { - runTestRW(263, 131, 263); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + const int depth = GENERATE(4, 11); + runTestR(width, height, depth); } diff --git a/catch/unit/surface/surfCubemap.cc b/catch/unit/surface/surfCubemap.cc new file mode 100644 index 0000000000..c0fa488567 --- /dev/null +++ b/catch/unit/surface/surfCubemap.cc @@ -0,0 +1,338 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surfCubemap surfCubemap + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +#define LOG_DATA 0 + +template +__global__ void surfCubemapKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapread(outputData + y * width + x, surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapwrite(inputData[y * width + x], surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width, int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + T data; + surfCubemapread(&data, surfaceObject, x * sizeof(T), y, 0); + surfCubemapwrite(data, outputSurfObj, x * sizeof(T), y, 0); + } +#endif +} + +template static void runTestR(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapKernelR<<>>(surfaceObject, hOutputData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapKernelW<<>>(surfaceObject, hData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } +#if LOG_DATA + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); + } + printf("\n"); +#endif + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapKernelRW<<>>(surfaceObject, outSurfaceObject, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + +#if LOG_DATA + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); + } + printf("\n"); +#endif + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapwrite_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapread` and `surfCubemapwrite` together, with different types and + * dimensions. Test source + * ------------------------ + * - unit/surface/surfCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemap_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); +} diff --git a/catch/unit/surface/surfCubemapLayered.cc b/catch/unit/surface/surfCubemapLayered.cc new file mode 100644 index 0000000000..89da56ed53 --- /dev/null +++ b/catch/unit/surface/surfCubemapLayered.cc @@ -0,0 +1,340 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surfCubemapLayered surfCubemapLayered + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +#define LOG_DATA 0 + +template +__global__ void surfCubemapLayeredKernelR(hipSurfaceObject_t surfaceObject, T* outputData, + int width, int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapLayeredread(outputData + y * width + x, surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapLayeredKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapLayeredwrite(inputData[y * width + x], surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapLayeredKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + T data; + surfCubemapLayeredread(&data, surfaceObject, x * sizeof(T), y, 0); + surfCubemapLayeredwrite(data, outputSurfObj, x * sizeof(T), y, 0); + } +#endif +} + +template static void runTestR(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapLayeredKernelR<<>>(surfaceObject, hOutputData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapLayeredKernelW<<>>(surfaceObject, hData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } +#if LOG_DATA + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); + } + printf("\n"); +#endif + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapLayeredKernelRW + <<>>(surfaceObject, outSurfaceObject, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + +#if LOG_DATA + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); + } + printf("\n"); +#endif + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapLayeredread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapLayeredread_Positive_Basic", "", char, uchar, short, ushort, + int, uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, + uchar2, short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, + int4, uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapLayeredwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapLayeredwrite_Positive_Basic", "", char, uchar, short, ushort, + int, uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, + uchar2, short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, + int4, uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapLayeredread` and `surfCubemapLayeredwrite` together, with + * different types and dimensions. Test source + * ------------------------ + * - unit/surface/surfCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapLayered_Positive_ReadWrite", "", char, uchar, short, ushort, + int, uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, + uchar2, short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, + int4, uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); +} diff --git a/catch/unit/syncthreads/CMakeLists.txt b/catch/unit/syncthreads/CMakeLists.txt new file mode 100644 index 0000000000..a9660f66bc --- /dev/null +++ b/catch/unit/syncthreads/CMakeLists.txt @@ -0,0 +1,55 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + __syncthreads.cc + __syncthreads_count.cc + __syncthreads_and.cc + __syncthreads_or.cc +) + +# the last argument linker libraries is required for this test but optional to the function +if(HIP_PLATFORM MATCHES "nvidia") +hip_add_exe_to_target(NAME SyncthreadsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") +hip_add_exe_to_target(NAME SyncthreadsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +# Below tests fail in PSDB +#add_test(NAME Unit___syncthreads_count_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# __syncthreads_count_negative_kernels.cc 2) +# +#add_test(NAME Unit___syncthreads_and_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# __syncthreads_and_negative_kernels.cc 2) +# +#add_test(NAME Unit___syncthreads_or_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# __syncthreads_or_negative_kernels.cc 2) \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads.cc b/catch/unit/syncthreads/__syncthreads.cc new file mode 100644 index 0000000000..2f63ae39ae --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads __syncthreads + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_and.cc b/catch/unit/syncthreads/__syncthreads_and.cc new file mode 100644 index 0000000000..91dc17fbb2 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_and.cc @@ -0,0 +1,241 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "__syncthreads_and_negative_kernels_rtc.hh" +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads_and __syncthreads_and + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads_and`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with 0 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_Zero") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsZeroKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with 1 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_One") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOneKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with 0 as the predicate for even threads, and 1 as the predicate + * for odd threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_OddEven") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOddEvenKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with a negative predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_Negative") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsNegativeKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with the thread ID as the predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_Id") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsIdKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Real-time compiles kernels that pass invalid arguments to `__syncthreads_and`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + HIPRTC_CHECK(hiprtcCreateProgram(&program, kSyncthreadsAndSource, "__syncthreads_and_negative.cc", + 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc b/catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc new file mode 100644 index 0000000000..5d889a99d2 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void __syncthreads_and_v1(int* predicate) { int result = __syncthreads_and(predicate); } + +__global__ void __syncthreads_and_v2(Dummy predicate) { int result = __syncthreads_and(predicate); } \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh b/catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh new file mode 100644 index 0000000000..fc7be27871 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh @@ -0,0 +1,39 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +static constexpr auto kSyncthreadsAndSource{ + R"( + struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void __syncthreads_and_v1(int* predicate) { + int result = __syncthreads_and(predicate); + } + + __global__ void __syncthreads_and_v2(Dummy predicate) { + int result = __syncthreads_and(predicate); + } + )"}; \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_count.cc b/catch/unit/syncthreads/__syncthreads_count.cc new file mode 100644 index 0000000000..dd084f436e --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_count.cc @@ -0,0 +1,241 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "__syncthreads_count_negative_kernels_rtc.hh" +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads_count __syncthreads_count + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads_count`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with 0 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_Zero") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsZeroKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with 1 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_One") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOneKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with 0 as the predicate for even threads, and 1 as the predicate + * for odd threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_OddEven") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOddEvenKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with a negative predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_Negative") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsNegativeKernel, kGridSize, kBlockSize, + 0, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with the thread ID as the predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_Id") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsIdKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize - 1); + } +} + +/** + * Test Description + * ------------------------ + * - Real-time compiles kernels that pass invalid arguments to `__syncthreads_count`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + HIPRTC_CHECK(hiprtcCreateProgram(&program, kSyncthreadsCountSource, + "__syncthreads_count_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc b/catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc new file mode 100644 index 0000000000..83d8cf08ab --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc @@ -0,0 +1,36 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void __syncthreads_count_v1(int* predicate) { + int result = __syncthreads_count(predicate); +} + +__global__ void __syncthreads_count_v2(Dummy predicate) { + int result = __syncthreads_count(predicate); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh b/catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh new file mode 100644 index 0000000000..9f40e51175 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh @@ -0,0 +1,39 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +static constexpr auto kSyncthreadsCountSource{ + R"( + struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void __syncthreads_count_v1(int* predicate) { + int result = __syncthreads_count(predicate); + } + + __global__ void __syncthreads_count_v2(Dummy predicate) { + int result = __syncthreads_count(predicate); + } + )"}; \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_or.cc b/catch/unit/syncthreads/__syncthreads_or.cc new file mode 100644 index 0000000000..d392c50eab --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_or.cc @@ -0,0 +1,241 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "__syncthreads_or_negative_kernels_rtc.hh" +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads_or __syncthreads_or + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads_or`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with 0 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_Zero") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsZeroKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with 1 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_One") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOneKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with 0 as the predicate for even threads, and 1 as the predicate for + * odd threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_OddEven") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOddEvenKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with a negative predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_Negative") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsNegativeKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with the thread ID as the predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_Id") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsIdKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Real-time compiles kernels that pass invalid arguments to `__syncthreads_or`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + HIPRTC_CHECK(hiprtcCreateProgram(&program, kSyncthreadsOrSource, "__syncthreads_or_negative.cc", + 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc b/catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc new file mode 100644 index 0000000000..b6f46a811c --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void __syncthreads_or_v1(int* predicate) { int result = __syncthreads_or(predicate); } + +__global__ void __syncthreads_or_v2(Dummy predicate) { int result = __syncthreads_or(predicate); } \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh b/catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh new file mode 100644 index 0000000000..dd7e1f93b0 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh @@ -0,0 +1,39 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +static constexpr auto kSyncthreadsOrSource{ + R"( + struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void __syncthreads_or_v1(int* predicate) { + int result = __syncthreads_or(predicate); + } + + __global__ void __syncthreads_or_v2(Dummy predicate) { + int result = __syncthreads_or(predicate); + } + )"}; \ No newline at end of file diff --git a/catch/unit/syncthreads/syncthreads_common.hh b/catch/unit/syncthreads/syncthreads_common.hh new file mode 100644 index 0000000000..c6f9dec8d4 --- /dev/null +++ b/catch/unit/syncthreads/syncthreads_common.hh @@ -0,0 +1,79 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +enum class SyncthreadsKind { kDefault, kCount, kAnd, kOr }; + +template __device__ int Syncthreads(int predicate) { + if constexpr (kind == SyncthreadsKind::kDefault) { + __syncthreads(); + return 0; + } else if constexpr (kind == SyncthreadsKind::kCount) { + return __syncthreads_count(predicate); + } else if constexpr (kind == SyncthreadsKind::kAnd) { + return __syncthreads_and(predicate); + } else if constexpr (kind == SyncthreadsKind::kOr) { + return __syncthreads_or(predicate); + } +} + +template __global__ void SyncthreadsKernel(int* out) { + extern __shared__ int shared_mem[]; + + shared_mem[threadIdx.x] = threadIdx.x + 1; + + Syncthreads(0); + + if (threadIdx.x == 0) { + int sum = 0; + for (int i = 0; i < blockDim.x; ++i) { + sum += shared_mem[i]; + } + out[blockIdx.x] = sum; + } +} + +template __global__ void SyncthreadsZeroKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(0); +} + +template __global__ void SyncthreadsOneKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(1); +} + +template __global__ void SyncthreadsOddEvenKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(threadIdx.x % 2); +} + +template __global__ void SyncthreadsNegativeKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(-1); +} + +template __global__ void SyncthreadsIdKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(threadIdx.x); +} \ No newline at end of file diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index fe4c0a8d1d..1fb2278149 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -53,6 +53,67 @@ set(TEST_SRC hipMipmappedArrayGetLevel.cc ) +# tests not for MI200+ +set(NOT_FOR_MI200_AND_ABOVE_TEST + tex1Dfetch.cc + tex1D.cc + tex1DLayered.cc + tex1DGrad.cc + tex1DLayeredGrad.cc + tex1DLayeredLod.cc + tex1DLod.cc + tex3D.cc + tex3DLod.cc + tex3DGrad.cc + texCubemap.cc + texCubemapLod.cc + texCubemapGrad.cc + texCubemapLayered.cc + texCubemapLayeredLod.cc + texCubemapLayeredGrad.cc + tex2Dgather.cc + tex2D.cc + tex2DLayered.cc + tex2DGrad.cc + tex2DLayeredGrad.cc + tex2DLod.cc + tex2DLayeredLod.cc + tex3D.cc + tex3DLod.cc + tex3DGrad.cc +) +set(MI200_AND_ABOVE_TARGETS gfx90a gfx940 gfx941 gfx942) +function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL) + set(ARCH_CHECK -1 PARENT_SCOPE) + string(REGEX MATCHALL "--offload-arch=gfx[0-9a-z]+" OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR_LOCAL}) + foreach(OFFLOAD_ARCH IN LISTS OFFLOAD_ARCH_LIST) + string(REGEX MATCHALL "--offload-arch=(gfx[0-9a-z]+)" matches ${OFFLOAD_ARCH}) + if (CMAKE_MATCH_COUNT EQUAL 1) + if (CMAKE_MATCH_1 IN_LIST MI200_AND_ABOVE_TARGETS) + set(ARCH_CHECK 1 PARENT_SCOPE) + endif() # CMAKE_MATCH_1 + endif() # CMAKE_MATCH_COUNT + endforeach() # OFFLOAD_ARCH_LIST +endfunction() # CheckAcceptedArchs + +if(HIP_PLATFORM MATCHES "amd") + if (DEFINED OFFLOAD_ARCH_STR) + CheckRejectedArchs(${OFFLOAD_ARCH_STR}) + elseif(DEFINED $ENV{HCC_AMDGPU_TARGET}) + CheckRejectedArchs($ENV{HCC_AMDGPU_TARGET}) + else() + set(ARCH_CHECK -1) + endif() + if(${ARCH_CHECK} EQUAL -1) + message(STATUS "Adding test: ${NOT_FOR_MI200_AND_ABOVE_TEST}") + set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI200_AND_ABOVE_TEST}) + else() + message(STATUS "Removing test: ${NOT_FOR_MI200_AND_ABOVE_TEST}") + endif() +else() + set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI200_AND_ABOVE_TEST}) +endif() + if(WIN32) set(TEST_SRC ${TEST_SRC} diff --git a/catch/unit/texture/kernels.hh b/catch/unit/texture/kernels.hh index 056dd3f6e0..d74caa5b2a 100644 --- a/catch/unit/texture/kernels.hh +++ b/catch/unit/texture/kernels.hh @@ -33,6 +33,14 @@ __host__ __device__ inline float GetCoordinate(size_t iteration, size_t N, size_ return normalized_coords ? x / dim : x; } +template +__global__ void tex1DfetchKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + out[tid] = tex1D(tex_obj, tid); +} + template __global__ void tex1DKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords) { @@ -43,6 +51,66 @@ __global__ void tex1DKernel(TexelType* const out, size_t N, hipTextureObject_t t out[tid] = tex1D(tex_obj, x); } +template +__global__ void tex1DLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, bool normalized_coords, + float level_of_detail) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DLod(tex_obj, x, level_of_detail); +} + +template +__global__ void tex1DLayeredLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, bool normalized_coords, + int layer, float level_of_detail) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DLayeredLod(tex_obj, x, layer, level_of_detail); +} + +template +__global__ void tex1DGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, bool normalized_coords, + float dx, float dy) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DGrad(tex_obj, x, dx, dy); +} + +template +__global__ void tex1DLayeredGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, + bool normalized_coords, int layer, float dx, float dy) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DLayeredGrad(tex_obj, x, layer, dx, dy); +} + +template +__global__ void tex2DgatherKernel(TexelType* const out, int comp, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2Dgather(tex_obj, x, y, comp); +} + template __global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, @@ -59,6 +127,73 @@ __global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y, out[tid_y * N_x + tid_x] = tex2D(tex_obj, x, y); } +template +__global__ void tex2DGradKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, float2 dx, + float2 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DGrad(tex_obj, x, y, dx, dy); +} + +template +__global__ void tex2DLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, float layer, + float2 dx, float2 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DLayeredGrad(tex_obj, x, y, layer, dx, dy); +} + +template +__global__ void tex2DLodKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DLod(tex_obj, x, y, level); +} + +template +__global__ void tex2DLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, int layer, + float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DLayeredLod(tex_obj, x, y, layer, level); +} + template __global__ void tex3DKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, @@ -79,6 +214,111 @@ __global__ void tex3DKernel(TexelType* const out, size_t N_x, size_t N_y, size_t out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3D(tex_obj, x, y, z); } +template +__global__ void tex3DLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DLod(tex_obj, x, y, z, level); +} + +template +__global__ void tex3DGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float4 dx, float4 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DGrad(tex_obj, x, y, z, dx, dy); +} + +template +__global__ void texCubemapKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemap(tex_obj, x, y, z); +} + +template +__global__ void texCubemapLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLod(tex_obj, x, y, z, level); +} + +template +__global__ void texCubemapGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float4 dx, float4 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapGrad(tex_obj, x, y, z, dx, dy); +} + template __global__ void tex1DLayeredKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, @@ -104,4 +344,71 @@ __global__ void tex2DLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DLayered(tex_obj, x, y, layer); +} + +template +__global__ void texCubemapLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, + bool normalized_coords, size_t layer) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapLayered(tex_obj, x, y, z, layer); +} + +template +__global__ void texCubemapLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, + bool normalized_coords, size_t layer, float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapLayeredLod(tex_obj, x, y, z, layer, level); +} + +template +__global__ void texCubemapLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y, + size_t N_z, hipTextureObject_t tex_obj, size_t width, + size_t height, size_t depth, size_t num_subdivisions, + bool normalized_coords, size_t layer, float4 dx, + float4 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapLayeredGrad(tex_obj, x, y, z, layer, dx, dy); } \ No newline at end of file diff --git a/catch/unit/texture/test_fixture.hh b/catch/unit/texture/test_fixture.hh index 47ac8b73bc..607e7fa92b 100644 --- a/catch/unit/texture/test_fixture.hh +++ b/catch/unit/texture/test_fixture.hh @@ -34,6 +34,7 @@ template struct TextureTestParams { size_t layers; size_t num_subdivisions; hipTextureDesc tex_desc; + bool cubemap; size_t Size() const { return extent.width * (extent.height ?: 1) * (extent.depth ?: 1) * (layers ?: 1); @@ -53,6 +54,10 @@ template struct TextureTestParams { size_t Depth() const { return extent.depth; } + unsigned int Flags() const { + return (Layered() ? hipArrayLayered : 0u) | (cubemap ? hipArrayCubemap : 0u); + } + hipExtent LayeredExtent() const { return Layered() ? make_hipExtent(Width(), Height(), layers) : extent; } @@ -94,19 +99,25 @@ template struct TextureTestParams { tex_desc.addressMode[0] = address_mode_x; if (extent.height) tex_desc.addressMode[1] = address_mode_y; if (extent.depth) tex_desc.addressMode[2] = address_mode_z; + + tex_desc.mipmapFilterMode = tex_desc.filterMode; } }; -template struct TextureTestFixture { +template +struct TextureTestFixture { using VecType = vec4; using OutType = std::conditional_t, VecType>; + template + using ArrayAllocGuardType = + std::conditional_t, ArrayAllocGuard>; TextureTestParams params; hipResourceDesc res_desc; LinearAllocGuard host_alloc; TextureReference tex_h; - ArrayAllocGuard tex_alloc_d; + ArrayAllocGuardType tex_alloc_d; TextureGuard tex; LinearAllocGuard out_alloc_d; std::vector out_alloc_h; @@ -115,7 +126,7 @@ template struct TextureTestFix : params{p}, host_alloc{LinearAllocs::hipHostMalloc, sizeof(VecType) * params.Size()}, tex_h{host_alloc.ptr(), params.extent, params.layers}, - tex_alloc_d{params.LayeredExtent(), params.Layered() ? hipArrayLayered : 0u}, + tex_alloc_d{params.LayeredExtent(), params.Flags()}, tex{ResDesc(), ¶ms.tex_desc}, out_alloc_d{LinearAllocs::hipMalloc, sizeof(OutType) * params.NumIters()}, out_alloc_h(params.NumIters()) {} @@ -127,7 +138,12 @@ template struct TextureTestFix } hipMemcpy3DParms memcpy_params = {}; - memcpy_params.dstArray = tex_alloc_d.ptr(); + memset(&memcpy_params, 0, sizeof(hipMemcpy3DParms)); + if constexpr (mipmap) { + memcpy_params.dstArray = tex_alloc_d.GetLevel(0); + } else { + memcpy_params.dstArray = tex_alloc_d.ptr(); + } memcpy_params.extent = params.LayeredExtent(); memcpy_params.extent.height = memcpy_params.extent.height ?: 1; memcpy_params.extent.depth = memcpy_params.extent.depth ?: 1; @@ -137,8 +153,13 @@ template struct TextureTestFix HIP_CHECK(hipMemcpy3D(&memcpy_params)); memset(&res_desc, 0, sizeof(res_desc)); - res_desc.resType = hipResourceTypeArray; - res_desc.res.array.array = tex_alloc_d.ptr(); + if constexpr (mipmap) { + res_desc.resType = hipResourceTypeMipmappedArray; + res_desc.res.mipmap.mipmap = tex_alloc_d.ptr(); + } else { + res_desc.resType = hipResourceTypeArray; + res_desc.res.array.array = tex_alloc_d.ptr(); + } return &res_desc; } diff --git a/catch/unit/texture/tex1D.cc b/catch/unit/texture/tex1D.cc new file mode 100644 index 0000000000..17f360d17f --- /dev/null +++ b/catch/unit/texture/tex1D.cc @@ -0,0 +1,139 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1D tex1D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1D` and read mode set to `hipReadModeElementType`. The test + * is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1D_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1D` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DGrad.cc b/catch/unit/texture/tex1DGrad.cc new file mode 100644 index 0000000000..1c006571b1 --- /dev/null +++ b/catch/unit/texture/tex1DGrad.cc @@ -0,0 +1,139 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DGrad tex1DGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DGrad` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DGrad` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DLayered.cc b/catch/unit/texture/tex1DLayered.cc new file mode 100644 index 0000000000..e0ad4f707c --- /dev/null +++ b/catch/unit/texture/tex1DLayered.cc @@ -0,0 +1,149 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLayered tex1DLayered + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayered` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayered_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + const auto ref_val = fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayered` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayered_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = Vec4Map(fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} diff --git a/catch/unit/texture/tex1DLayeredGrad.cc b/catch/unit/texture/tex1DLayeredGrad.cc new file mode 100644 index 0000000000..6115c939b8 --- /dev/null +++ b/catch/unit/texture/tex1DLayeredGrad.cc @@ -0,0 +1,150 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLayeredGrad tex1DLayeredGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredGrad` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.5f, 0.5f); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.5f, 0.5f); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = Vec4Map(fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DLayeredLod.cc b/catch/unit/texture/tex1DLayeredLod.cc new file mode 100644 index 0000000000..a39d664502 --- /dev/null +++ b/catch/unit/texture/tex1DLayeredLod.cc @@ -0,0 +1,149 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLayeredLod tex1DLayeredLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredLod` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = Vec4Map(fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DLod.cc b/catch/unit/texture/tex1DLod.cc new file mode 100644 index 0000000000..e38ed60745 --- /dev/null +++ b/catch/unit/texture/tex1DLod.cc @@ -0,0 +1,139 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLod tex1DLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLod` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLod` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1Dfetch.cc b/catch/unit/texture/tex1Dfetch.cc new file mode 100644 index 0000000000..9354d92a46 --- /dev/null +++ b/catch/unit/texture/tex1Dfetch.cc @@ -0,0 +1,162 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include +#include + +#include "kernels.hh" +#include "utils.hh" +#include "vec4.hh" + +/** + * @addtogroup tex1D tex1D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1Dfetch` and read mode set to `hipReadModeElementType`. + * Test source + * ------------------------ + * - unit/texture/tex1Dfetch.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1Dfetch_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + std::vector> tex_h(1024); + for (auto i = 0u; i < tex_h.size(); ++i) { + tex_h[i].x = i + 7; + tex_h[i].y = i + 7; + tex_h[i].z = i + 7; + tex_h[i].w = i + 7; + } + + const auto alloc_size = tex_h.size() * sizeof(vec4); + LinearAllocGuard> tex_alloc_d(LinearAllocs::hipMalloc, alloc_size); + HIP_CHECK(hipMemcpy(tex_alloc_d.ptr(), tex_h.data(), alloc_size, hipMemcpyHostToDevice)); + + hipResourceDesc res_desc; + memset(&res_desc, 0, sizeof(res_desc)); + res_desc.resType = hipResourceTypeLinear; + res_desc.res.linear.devPtr = tex_alloc_d.ptr(); + res_desc.res.linear.desc = hipCreateChannelDesc>(); + res_desc.res.linear.sizeInBytes = alloc_size; + + hipTextureDesc tex_desc; + memset(&tex_desc, 0, sizeof(tex_desc)); + tex_desc.filterMode = hipFilterModePoint; + tex_desc.readMode = hipReadModeElementType; + tex_desc.normalizedCoords = false; + tex_desc.addressMode[0] = hipAddressModeClamp; + + LinearAllocGuard> out_alloc_d(LinearAllocs::hipMalloc, alloc_size); + TextureGuard tex(&res_desc, &tex_desc); + + const auto num_threads = std::min(1024, tex_h.size()); + const auto num_blocks = (tex_h.size() + num_threads - 1) / num_threads; + tex1DfetchKernel> + <<>>(out_alloc_d.ptr(), tex_h.size(), tex.object()); + + std::vector> out_alloc_h(tex_h.size()); + HIP_CHECK(hipMemcpy(out_alloc_h.data(), out_alloc_d.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + for (auto i = 0u; i < out_alloc_h.size(); ++i) { + INFO("Index: " << i); + const auto ref_val = tex_h[i]; + REQUIRE(ref_val.x == out_alloc_h[i].x); + REQUIRE(ref_val.y == out_alloc_h[i].y); + REQUIRE(ref_val.z == out_alloc_h[i].z); + REQUIRE(ref_val.w == out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1Dfetch` and read mode set to `hipReadModeNormalizedFloat`. + * Test source + * ------------------------ + * - unit/texture/tex1Dfetch.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + std::vector> tex_h(1024); + for (auto i = 0u; i < tex_h.size(); ++i) { + tex_h[i].x = i + 7; + tex_h[i].y = i + 7; + tex_h[i].z = i + 7; + tex_h[i].w = i + 7; + } + + const auto alloc_size = tex_h.size() * sizeof(vec4); + LinearAllocGuard> tex_alloc_d(LinearAllocs::hipMalloc, alloc_size); + HIP_CHECK(hipMemcpy(tex_alloc_d.ptr(), tex_h.data(), alloc_size, hipMemcpyHostToDevice)); + + hipResourceDesc res_desc; + memset(&res_desc, 0, sizeof(res_desc)); + res_desc.resType = hipResourceTypeLinear; + res_desc.res.linear.devPtr = tex_alloc_d.ptr(); + res_desc.res.linear.desc = hipCreateChannelDesc>(); + res_desc.res.linear.sizeInBytes = alloc_size; + + hipTextureDesc tex_desc; + memset(&tex_desc, 0, sizeof(tex_desc)); + tex_desc.filterMode = hipFilterModePoint; + tex_desc.readMode = hipReadModeElementType; + tex_desc.normalizedCoords = false; + tex_desc.addressMode[0] = hipAddressModeClamp; + + LinearAllocGuard> out_alloc_d(LinearAllocs::hipMalloc, alloc_size); + TextureGuard tex(&res_desc, &tex_desc); + + const auto num_threads = std::min(1024, tex_h.size()); + const auto num_blocks = (tex_h.size() + num_threads - 1) / num_threads; + tex1DfetchKernel> + <<>>(out_alloc_d.ptr(), tex_h.size(), tex.object()); + + std::vector> out_alloc_h(tex_h.size()); + HIP_CHECK(hipMemcpy(out_alloc_h.data(), out_alloc_d.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + for (auto i = 0u; i < out_alloc_h.size(); ++i) { + INFO("Index: " << i); + const auto ref_val = Vec4Map(tex_h[i], NormalizeInteger); + REQUIRE(ref_val.x == out_alloc_h[i].x); + REQUIRE(ref_val.y == out_alloc_h[i].y); + REQUIRE(ref_val.z == out_alloc_h[i].z); + REQUIRE(ref_val.w == out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2D.cc b/catch/unit/texture/tex2D.cc new file mode 100644 index 0000000000..7b31a03944 --- /dev/null +++ b/catch/unit/texture/tex2D.cc @@ -0,0 +1,173 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2D tex2D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2D` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2D` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersY(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DGrad.cc b/catch/unit/texture/tex2DGrad.cc new file mode 100644 index 0000000000..939a6a2543 --- /dev/null +++ b/catch/unit/texture/tex2DGrad.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DGrad tex2DGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DGrad` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DGrad` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayered.cc b/catch/unit/texture/tex2DLayered.cc new file mode 100644 index 0000000000..4929a5b3e0 --- /dev/null +++ b/catch/unit/texture/tex2DLayered.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLayered tex2DLayered + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayered` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayered` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayeredGrad.cc b/catch/unit/texture/tex2DLayeredGrad.cc new file mode 100644 index 0000000000..d58920bf56 --- /dev/null +++ b/catch/unit/texture/tex2DLayeredGrad.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLayeredGrad tex2DLayeredGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredGrad` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredGrad_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayeredLod.cc b/catch/unit/texture/tex2DLayeredLod.cc new file mode 100644 index 0000000000..e51000c204 --- /dev/null +++ b/catch/unit/texture/tex2DLayeredLod.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLayeredLod tex2DLayeredLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredLod` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredLod_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLod.cc b/catch/unit/texture/tex2DLod.cc new file mode 100644 index 0000000000..e875e09133 --- /dev/null +++ b/catch/unit/texture/tex2DLod.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLod tex2DLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLod` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLod_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DLodKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLod` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DLodKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2Dgather.cc b/catch/unit/texture/tex2Dgather.cc new file mode 100644 index 0000000000..6d1c262097 --- /dev/null +++ b/catch/unit/texture/tex2Dgather.cc @@ -0,0 +1,104 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2Dgather tex2Dgather + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2Dgather` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2Dgather.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2Dgather_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + const int comp = GENERATE(0, 1, 2, 3); + + tex2DgatherKernel><<>>( + fixture.out_alloc_d.ptr(), comp, params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DGather(x, y, comp, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex3D.cc b/catch/unit/texture/tex3D.cc new file mode 100644 index 0000000000..2aaefe84d3 --- /dev/null +++ b/catch/unit/texture/tex3D.cc @@ -0,0 +1,194 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex3D tex3D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3D` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3D_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 4, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + const auto ref_val = fixture.tex_h.Tex3D(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3D` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + + auto ref_val = Vec4Map(fixture.tex_h.Tex3D(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} diff --git a/catch/unit/texture/tex3DGrad.cc b/catch/unit/texture/tex3DGrad.cc new file mode 100644 index 0000000000..b810359fc1 --- /dev/null +++ b/catch/unit/texture/tex3DGrad.cc @@ -0,0 +1,193 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex3DGrad tex3DGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DGrad` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + const auto ref_val = fixture.tex_h.Tex3D(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DGrad` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto ref_val = Vec4Map(fixture.tex_h.Tex3D(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} diff --git a/catch/unit/texture/tex3DLod.cc b/catch/unit/texture/tex3DLod.cc new file mode 100644 index 0000000000..e514c9b9a3 --- /dev/null +++ b/catch/unit/texture/tex3DLod.cc @@ -0,0 +1,193 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex3DLod tex3DLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DLod` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + const auto ref_val = fixture.tex_h.Tex3D(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DLod` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto ref_val = Vec4Map(fixture.tex_h.Tex3D(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} diff --git a/catch/unit/texture/texCubemap.cc b/catch/unit/texture/texCubemap.cc new file mode 100644 index 0000000000..572283a4a7 --- /dev/null +++ b/catch/unit/texture/texCubemap.cc @@ -0,0 +1,203 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemap texCubemap + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemap` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemap_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemap` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemap_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapGrad.cc b/catch/unit/texture/texCubemapGrad.cc new file mode 100644 index 0000000000..75a69f698c --- /dev/null +++ b/catch/unit/texture/texCubemapGrad.cc @@ -0,0 +1,203 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapGrad texCubemapGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapGrad` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLayered.cc b/catch/unit/texture/texCubemapLayered.cc new file mode 100644 index 0000000000..d7db8d0847 --- /dev/null +++ b/catch/unit/texture/texCubemapLayered.cc @@ -0,0 +1,209 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLayered texCubemapLayered + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayered` and read mode set to + * `hipReadModeElementType`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayered_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayered` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLayeredGrad.cc b/catch/unit/texture/texCubemapLayeredGrad.cc new file mode 100644 index 0000000000..68c5efdb86 --- /dev/null +++ b/catch/unit/texture/texCubemapLayeredGrad.cc @@ -0,0 +1,211 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLayeredGrad texCubemapLayeredGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredGrad` and read mode set to + * `hipReadModeElementType`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeElementType", "", char, + unsigned char, short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLayeredLod.cc b/catch/unit/texture/texCubemapLayeredLod.cc new file mode 100644 index 0000000000..af305d1e6c --- /dev/null +++ b/catch/unit/texture/texCubemapLayeredLod.cc @@ -0,0 +1,211 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLayeredLod texCubemapLayeredLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredLod` and read mode set to + * `hipReadModeElementType`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeElementType", "", char, + unsigned char, short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLod.cc b/catch/unit/texture/texCubemapLod.cc new file mode 100644 index 0000000000..e35f8f7b6c --- /dev/null +++ b/catch/unit/texture/texCubemapLod.cc @@ -0,0 +1,203 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLod texCubemapLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLod` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texture_reference.hh b/catch/unit/texture/texture_reference.hh index d2ee9159e0..c5b696cd9e 100644 --- a/catch/unit/texture/texture_reference.hh +++ b/catch/unit/texture/texture_reference.hh @@ -26,6 +26,10 @@ THE SOFTWARE. #include "fixed_point.hh" +#if defined(_WIN64) +typedef __int64 ssize_t; +#endif // _WIN64 + template class TextureReference { public: TextureReference(TexelType* alloc, hipExtent extent, size_t layers) @@ -35,6 +39,42 @@ template class TextureReference { return Tex1DLayered(x, 0, tex_desc); } + TexelType Tex2DGather(float x, float y, int comp, const hipTextureDesc& tex_desc) const { + x = tex_desc.normalizedCoords ? x * extent_.width : x; + y = tex_desc.normalizedCoords ? y * extent_.height : y; + + const auto [i, alpha] = GetLinearFilteringParams(x); + const auto [j, beta] = GetLinearFilteringParams(y); + + const auto T_i0j0 = Sample(i, j, 0, tex_desc.addressMode); + const auto T_i1j0 = Sample(i + 1.0f, j, 0, tex_desc.addressMode); + const auto T_i0j1 = Sample(i, j + 1.0f, 0, tex_desc.addressMode); + const auto T_i1j1 = Sample(i + 1.0f, j + 1.0f, 0, tex_desc.addressMode); + + const auto IndexVec4 = [](auto vec, int comp) { + switch (comp) { + case 0: + return vec.x; + case 1: + return vec.y; + case 2: + return vec.z; + case 3: + return vec.w; + default: + throw std::invalid_argument("Invalid gather comp"); + } + }; + + TexelType texel; + texel.x = IndexVec4(T_i0j1, comp); + texel.y = IndexVec4(T_i1j1, comp); + texel.z = IndexVec4(T_i1j0, comp); + texel.w = IndexVec4(T_i0j0, comp); + + return texel; + } + TexelType Tex2D(float x, float y, const hipTextureDesc& tex_desc) const { return Tex2DLayered(x, y, 0, tex_desc); } @@ -52,6 +92,64 @@ template class TextureReference { } } + TexelType TexCubemap(float x, float y, float z, const hipTextureDesc& tex_desc) const { + x = tex_desc.normalizedCoords ? x * extent_.width : x; + y = tex_desc.normalizedCoords ? y * extent_.height : y; + z = tex_desc.normalizedCoords ? z * extent_.depth : z; + + int face; + float m, s, t; + + if (std::abs(x) > std::abs(y) && std::abs(x) > std::abs(z)) { + if (x >= 0) { + face = 0; + m = x; + s = -z; + t = -y; + } else { + face = 1; + m = -x; + s = z; + t = -y; + } + } else if (std::abs(y) >= std::abs(x) && std::abs(y) > std::abs(z)) { + if (y >= 0) { + face = 2; + m = y; + s = x; + t = z; + } else { + face = 3; + m = -y; + s = x; + t = -z; + } + } else { + if (z >= 0) { + face = 4; + m = z; + s = x; + t = -y; + } else { + face = 5; + m = -z; + s = -x; + t = -y; + } + } + + float coord1 = (s / m + 1) / 2; + float coord2 = (t / m + 1) / 2; + + if (tex_desc.filterMode == hipFilterModePoint) { + return Sample(roundf(coord1), roundf(coord2), face, tex_desc.addressMode); + } else if (tex_desc.filterMode == hipFilterModeLinear) { + return LinearFiltering(coord1, coord2, face, tex_desc.addressMode); + } else { + throw std::invalid_argument("Invalid hipFilterMode value"); + } + } + TexelType Tex1DLayered(float x, int layer, const hipTextureDesc& tex_desc) const { x = tex_desc.normalizedCoords ? x * extent_.width : x; if (tex_desc.filterMode == hipFilterModePoint) { diff --git a/catch/unit/threadfence/CMakeLists.txt b/catch/unit/threadfence/CMakeLists.txt new file mode 100644 index 0000000000..51f61f4e2d --- /dev/null +++ b/catch/unit/threadfence/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + __threadfence_block.cc + __threadfence.cc + __threadfence_system.cc +) + +hip_add_exe_to_target(NAME ThreadfenceTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) \ No newline at end of file diff --git a/catch/unit/threadfence/__threadfence.cc b/catch/unit/threadfence/__threadfence.cc new file mode 100644 index 0000000000..155f7ef484 --- /dev/null +++ b/catch/unit/threadfence/__threadfence.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "threadfence_common.hh" + +/** + * @addtogroup __threadfence __threadfence + * @{ + * @ingroup ThreadfenceTest + */ + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on shared memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Shared") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 1, 2, + 4 * sizeof(int), nullptr, out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on global memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Global") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on page-locked host memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Pinned") { + LinearAllocGuard in_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.host_ptr(), in_host.host_ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on managed memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Managed") { + LinearAllocGuard in_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.ptr(), in_host.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on global peer device memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Peer") { + const auto device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("At least 2 devices are required"); + return; + } + + int can_access_peer; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, 0, 1)); + REQUIRE(can_access_peer); + + HIP_CHECK(hipSetDevice(0)); + + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HIP_CHECK(hipSetDevice(1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipSetDevice(0)); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} \ No newline at end of file diff --git a/catch/unit/threadfence/__threadfence_block.cc b/catch/unit/threadfence/__threadfence_block.cc new file mode 100644 index 0000000000..fcee25f628 --- /dev/null +++ b/catch/unit/threadfence/__threadfence_block.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "threadfence_common.hh" + +/** + * @addtogroup __threadfence_block __threadfence_block + * @{ + * @ingroup ThreadfenceTest + */ + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on shared memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Shared") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 1, 2, + 4 * sizeof(int), nullptr, out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on global memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Global") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on page-locked host memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Pinned") { + LinearAllocGuard in_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.host_ptr(), in_host.host_ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on managed memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Managed") { + LinearAllocGuard in_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.ptr(), in_host.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on global peer device memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Peer") { + const auto device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("At least 2 devices are required"); + return; + } + + int can_access_peer; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, 0, 1)); + REQUIRE(can_access_peer); + + HIP_CHECK(hipSetDevice(0)); + + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HIP_CHECK(hipSetDevice(1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipSetDevice(0)); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} \ No newline at end of file diff --git a/catch/unit/threadfence/__threadfence_system.cc b/catch/unit/threadfence/__threadfence_system.cc new file mode 100644 index 0000000000..331abdb407 --- /dev/null +++ b/catch/unit/threadfence/__threadfence_system.cc @@ -0,0 +1,126 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "threadfence_common.hh" + +/** + * @addtogroup __threadfence_system __threadfence_system + * @{ + * @ingroup ThreadfenceTest + */ + +__global__ void WriteKernel(int* in) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid == 0) { + Write(in); + } +} + +__global__ void ReadKernel(int* out, int* in) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid == 0) { + Read(out, in); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a system-wide memory fence on global peer device memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_system_Positive_Basic_Peer") { + const auto device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("At least 2 devices are required"); + return; + } + + int can_access_peer; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, 0, 1)); + REQUIRE(can_access_peer); + + HIP_CHECK(hipSetDevice(0)); + + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[0])), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32((hipDeviceptr_t)(&(in_dev.ptr()[1])), kInitVal2, 1)); + + HipTest::launchKernel(WriteKernel, 1, 1, 0, nullptr, in_dev.ptr()); + + HIP_CHECK(hipSetDevice(1)); + HipTest::launchKernel(ReadKernel, 1, 1, 0, nullptr, out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a system-wide memory fence on page-locked host memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_system_Positive_Basic_Host") { + LinearAllocGuard in_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(WriteKernel, 1, 1, 0, nullptr, in_host.host_ptr()); + Read(out_host.host_ptr(), in_host.host_ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} \ No newline at end of file diff --git a/catch/unit/threadfence/threadfence_common.hh b/catch/unit/threadfence/threadfence_common.hh new file mode 100644 index 0000000000..dc8dca776d --- /dev/null +++ b/catch/unit/threadfence/threadfence_common.hh @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +enum class ThreadfenceScope { kBlock, kDevice, kSystem }; + +template __device__ void Threadfence() { + if constexpr (scope == ThreadfenceScope::kBlock) { + __threadfence_block(); + } else if constexpr (scope == ThreadfenceScope::kDevice) { + __threadfence(); + } else if constexpr (scope == ThreadfenceScope::kSystem) { + __threadfence_system(); + } +} + +static constexpr int kInitVal1 = 1, kInitVal2 = 2; +static constexpr int kSetVal1 = 10, kSetVal2 = 20; + +template __host__ __device__ void Write(volatile int* in) { + in[0] = kSetVal1; +#ifdef __HIP_DEVICE_COMPILE__ + Threadfence(); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); +#endif + in[1] = kSetVal2; +} + +template +__host__ __device__ void Read(volatile int* out, volatile int* in) { + out[1] = in[1]; +#ifdef __HIP_DEVICE_COMPILE__ + Threadfence(); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); +#endif + out[0] = in[0]; +} + +template +__device__ void ThreadfenceTest(int* out, int* in) { + if constexpr (scope == ThreadfenceScope::kBlock || use_shared_mem) { + if (threadIdx.x == 0 && blockIdx.x == 0) { + Write(in); + } else if (threadIdx.x == 1 && blockIdx.x == 0) { + Read(out, in); + } + } else if constexpr (scope == ThreadfenceScope::kDevice) { + if (threadIdx.x == 0 && blockIdx.x == 0) { + Write(in); + } else if (threadIdx.x == 0 && blockIdx.x == 1) { + Read(out, in); + } + } +} + +template +__global__ void ThreadfenceTestKernel(int* out, int* in) { + extern __shared__ int shared_mem[]; + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + int *out_mem = out, *in_mem = in; + + if constexpr (use_shared_mem) { + if (tid == 0) { + in_mem = &shared_mem[0]; + out_mem = &shared_mem[2]; + + in_mem[0] = in[0]; + in_mem[1] = in[1]; + } + + __syncthreads(); + } + + ThreadfenceTest(out_mem, in_mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + + if (tid == 0) { + out[0] = out_mem[0]; + out[1] = out_mem[1]; + } + } +} \ No newline at end of file diff --git a/catch/unit/vector_types/CMakeLists.txt b/catch/unit/vector_types/CMakeLists.txt index 49619275f3..90d6d27dbd 100644 --- a/catch/unit/vector_types/CMakeLists.txt +++ b/catch/unit/vector_types/CMakeLists.txt @@ -21,9 +21,77 @@ # Common Tests - Test independent of all platforms set(TEST_SRC + vector_types.cc dim3.cc ) +# the last argument linker libraries is required for this test but optional to the function +if(HIP_PLATFORM MATCHES "nvidia") hip_add_exe_to_target(NAME VectorTypesTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests) + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") +hip_add_exe_to_target(NAME VectorTypesTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +# Below tests fail in PSDB +#if(HIP_PLATFORM MATCHES "amd") +# add_test(NAME Unit_NegateUnsigned_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_negate_unsigned.cc 40) +# +# add_test(NAME Unit_BitwiseFloat_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_bitwise_float.cc 96) +# +# add_test(NAME Unit_BitwiseDouble_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_bitwise_double.cc 96) +# +# add_test(NAME Unit_CalculateAssign1D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_value_1D.cc 60) +# +# add_test(NAME Unit_CalculateAssign2D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_value_2D.cc 60) +# +# add_test(NAME Unit_CalculateAssign3D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_value_3D.cc 60) +# +# add_test(NAME Unit_CalculateAssign4D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_value_4D.cc 60) +# +# add_test(NAME Unit_CalculateAssignUnsigned1D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_unsigned_value_1D.cc 60) +# +# add_test(NAME Unit_CalculateAssignUnsigned2D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_unsigned_value_2D.cc 60) +# +# add_test(NAME Unit_CalculateAssignUnsigned3D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_unsigned_value_3D.cc 60) +# +# add_test(NAME Unit_CalculateAssignUnsigned4D_Negative_Parameters +# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py +# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} +# negative_calculate_assign_with_unsigned_value_4D.cc 60) +#endif() diff --git a/catch/unit/vector_types/negative_bitwise_double.cc b/catch/unit/vector_types/negative_bitwise_double.cc new file mode 100644 index 0000000000..e3a88bfe28 --- /dev/null +++ b/catch/unit/vector_types/negative_bitwise_double.cc @@ -0,0 +1,25 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double1) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double2) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double3) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double4) diff --git a/catch/unit/vector_types/negative_bitwise_float.cc b/catch/unit/vector_types/negative_bitwise_float.cc new file mode 100644 index 0000000000..cc7185f098 --- /dev/null +++ b/catch/unit/vector_types/negative_bitwise_float.cc @@ -0,0 +1,25 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float1) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float2) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float3) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float4) diff --git a/catch/unit/vector_types/negative_bitwise_float_double_rtc.hh b/catch/unit/vector_types/negative_bitwise_float_double_rtc.hh new file mode 100644 index 0000000000..bf5eed0424 --- /dev/null +++ b/catch/unit/vector_types/negative_bitwise_float_double_rtc.hh @@ -0,0 +1,272 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +static constexpr auto kBitwiseFloat{R"( + __global__ void BitwiseDevice(float1* vector1_dev_ptr, float1* vector2_dev_ptr) { \ + float1 vector1_dev = *vector1_dev_ptr; \ + float1 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float1& vector1_host, float1& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(float2* vector1_dev_ptr, float2* vector2_dev_ptr) { \ + float2 vector1_dev = *vector1_dev_ptr; \ + float2 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float2& vector1_host, float2& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(float3* vector1_dev_ptr, float3* vector2_dev_ptr) { \ + float3 vector1_dev = *vector1_dev_ptr; \ + float3 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float3& vector1_host, float3& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(float4* vector1_dev_ptr, float4* vector2_dev_ptr) { \ + float4 vector1_dev = *vector1_dev_ptr; \ + float4 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float4& vector1_host, float4& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } +)"}; + +static constexpr auto kBitwiseDouble{R"( + __global__ void BitwiseDevice(double1* vector1_dev_ptr, double1* vector2_dev_ptr) { \ + double1 vector1_dev = *vector1_dev_ptr; \ + double1 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double1& vector1_host, double1& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(double2* vector1_dev_ptr, double2* vector2_dev_ptr) { \ + double2 vector1_dev = *vector1_dev_ptr; \ + double2 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double2& vector1_host, double2& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(double3* vector1_dev_ptr, double3* vector2_dev_ptr) { \ + double3 vector1_dev = *vector1_dev_ptr; \ + double3 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double3& vector1_host, double3& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(double4* vector1_dev_ptr, double4* vector2_dev_ptr) { \ + double4 vector1_dev = *vector1_dev_ptr; \ + double4 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double4& vector1_host, double4& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } +)"}; diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc new file mode 100644 index 0000000000..f95f2e5f0d --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong1) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc new file mode 100644 index 0000000000..9f69fc18e7 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong2) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc new file mode 100644 index 0000000000..99f9d8130e --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong3) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc new file mode 100644 index 0000000000..301ae99c61 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong4) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc new file mode 100644 index 0000000000..09d7267186 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong1) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc new file mode 100644 index 0000000000..173d5fcb4e --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong2) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc new file mode 100644 index 0000000000..8cd3145aa8 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong3) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc new file mode 100644 index 0000000000..9679d17548 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong4) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh b/catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh new file mode 100644 index 0000000000..ee921a016c --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh @@ -0,0 +1,750 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of longge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +static constexpr auto kCalculateAssignChar{R"( + __global__ void CalculateAssignDevice(char1* vector_dev_ptr, decltype(char1().x) value) { + char1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char1& vector_host, decltype(char1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar1* vector_dev_ptr, decltype(uchar1().x) value) { + uchar1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar1& vector_host, decltype(uchar1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(char2* vector_dev_ptr, decltype(char2().x) value) { + char2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char2& vector_host, decltype(char2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar2* vector_dev_ptr, decltype(uchar2().x) value) { + uchar2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar2& vector_host, decltype(uchar2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(char3* vector_dev_ptr, decltype(char3().x) value) { + char3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char3& vector_host, decltype(char3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar3* vector_dev_ptr, decltype(uchar3().x) value) { + uchar3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar3& vector_host, decltype(uchar3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(char4* vector_dev_ptr, decltype(char4().x) value) { + char4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char4& vector_host, decltype(char4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar4* vector_dev_ptr, decltype(uchar4().x) value) { + uchar4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar4& vector_host, decltype(uchar4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignShort{R"( + __global__ void CalculateAssignDevice(short1* vector_dev_ptr, decltype(short1().x) value) { + short1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short1& vector_host, decltype(short1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort1* vector_dev_ptr, decltype(ushort1().x) value) { + ushort1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort1& vector_host, decltype(ushort1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(short2* vector_dev_ptr, decltype(short2().x) value) { + short2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short2& vector_host, decltype(short2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort2* vector_dev_ptr, decltype(ushort2().x) value) { + ushort2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort2& vector_host, decltype(ushort2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(short3* vector_dev_ptr, decltype(short3().x) value) { + short3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short3& vector_host, decltype(short3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort3* vector_dev_ptr, decltype(ushort3().x) value) { + ushort3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort3& vector_host, decltype(ushort3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(short4* vector_dev_ptr, decltype(short4().x) value) { + short4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short4& vector_host, decltype(short4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort4* vector_dev_ptr, decltype(ushort4().x) value) { + ushort4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort4& vector_host, decltype(ushort4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignInt{R"( + __global__ void CalculateAssignDevice(int1* vector_dev_ptr, decltype(int1().x) value) { + int1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int1& vector_host, decltype(int1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint1* vector_dev_ptr, decltype(uint1().x) value) { + uint1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint1& vector_host, decltype(uint1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(int2* vector_dev_ptr, decltype(int2().x) value) { + int2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int2& vector_host, decltype(int2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint2* vector_dev_ptr, decltype(uint2().x) value) { + uint2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint2& vector_host, decltype(uint2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(int3* vector_dev_ptr, decltype(int3().x) value) { + int3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int3& vector_host, decltype(int3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint3* vector_dev_ptr, decltype(uint3().x) value) { + uint3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint3& vector_host, decltype(uint3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(int4* vector_dev_ptr, decltype(int4().x) value) { + int4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int4& vector_host, decltype(int4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint4* vector_dev_ptr, decltype(uint4().x) value) { + uint4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint4& vector_host, decltype(uint4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignLong{R"( + __global__ void CalculateAssignDevice(long1* vector_dev_ptr, decltype(long1().x) value) { + long1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long1& vector_host, decltype(long1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong1* vector_dev_ptr, decltype(ulong1().x) value) { + ulong1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong1& vector_host, decltype(ulong1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(long2* vector_dev_ptr, decltype(long2().x) value) { + long2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long2& vector_host, decltype(long2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong2* vector_dev_ptr, decltype(ulong2().x) value) { + ulong2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong2& vector_host, decltype(ulong2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(long3* vector_dev_ptr, decltype(long3().x) value) { + long3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long3& vector_host, decltype(long3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong3* vector_dev_ptr, decltype(ulong3().x) value) { + ulong3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong3& vector_host, decltype(ulong3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(long4* vector_dev_ptr, decltype(long4().x) value) { + long4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long4& vector_host, decltype(long4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong4* vector_dev_ptr, decltype(ulong4().x) value) { + ulong4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong4& vector_host, decltype(ulong4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignLongLong{R"( + __global__ void CalculateAssignDevice(longlong1* vector_dev_ptr, decltype(longlong1().x) value) { + longlong1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong1& vector_host, decltype(longlong1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong1* vector_dev_ptr, decltype(ulonglong1().x) value) { + ulonglong1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong1& vector_host, decltype(ulonglong1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(longlong2* vector_dev_ptr, decltype(longlong2().x) value) { + longlong2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong2& vector_host, decltype(longlong2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong2* vector_dev_ptr, decltype(ulonglong2().x) value) { + ulonglong2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong2& vector_host, decltype(ulonglong2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(longlong3* vector_dev_ptr, decltype(longlong3().x) value) { + longlong3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong3& vector_host, decltype(longlong3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong3* vector_dev_ptr, decltype(ulonglong3().x) value) { + ulonglong3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong3& vector_host, decltype(ulonglong3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(longlong4* vector_dev_ptr, decltype(longlong4().x) value) { + longlong4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong4& vector_host, decltype(longlong4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong4* vector_dev_ptr, decltype(ulonglong4().x) value) { + ulonglong4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong4& vector_host, decltype(ulonglong4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; diff --git a/catch/unit/vector_types/negative_macros_common.hh b/catch/unit/vector_types/negative_macros_common.hh new file mode 100644 index 0000000000..2967d44800 --- /dev/null +++ b/catch/unit/vector_types/negative_macros_common.hh @@ -0,0 +1,78 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#define NEGATE_UNSIGNED_VECTOR_FUNCTIONS(type) \ + __global__ void NegateDevice(type* vector_dev_ptr) { \ + type vector_dev = *vector_dev_ptr; \ + vector_dev = -vector_dev; \ + } \ + void NegateHost(type& vector_host) { vector_host = -vector_host; } + +#define BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(type) \ + __global__ void BitwiseDevice(type* vector1_dev_ptr, type* vector2_dev_ptr) { \ + type vector1_dev = *vector1_dev_ptr; \ + type vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(type& vector1_host, type& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + +#define CALCULATE_ASSIGN_VECTOR_FUNCTIONS(type) \ + __global__ void CalculateAssignDevice(type* vector_dev_ptr, decltype(type().x) value) { \ + type vector_dev = *vector_dev_ptr; \ + vector_dev %= value; \ + vector_dev ^= value; \ + vector_dev |= value; \ + vector_dev &= value; \ + vector_dev >>= value; \ + vector_dev <<= value; \ + } \ + void CalculateAssignHost(type& vector_host, decltype(type().x) value) { \ + vector_host %= value; \ + vector_host ^= value; \ + vector_host |= value; \ + vector_host &= value; \ + vector_host >>= value; \ + vector_host <<= value; \ + } diff --git a/catch/unit/vector_types/negative_negate_unsigned.cc b/catch/unit/vector_types/negative_negate_unsigned.cc new file mode 100644 index 0000000000..c8bd39c7a8 --- /dev/null +++ b/catch/unit/vector_types/negative_negate_unsigned.cc @@ -0,0 +1,41 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong4) diff --git a/catch/unit/vector_types/negative_negate_unsigned_rtc.hh b/catch/unit/vector_types/negative_negate_unsigned_rtc.hh new file mode 100644 index 0000000000..c393570a09 --- /dev/null +++ b/catch/unit/vector_types/negative_negate_unsigned_rtc.hh @@ -0,0 +1,150 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +static constexpr auto kNegateUnsignedChar{R"( + __global__ void NegateDevice(uchar1* vector_dev_ptr) { + uchar1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uchar2* vector_dev_ptr) { + uchar2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uchar3* vector_dev_ptr) { + uchar3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uchar4* vector_dev_ptr) { + uchar4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedShort{R"( + __global__ void NegateDevice(ushort1* vector_dev_ptr) { + ushort1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ushort2* vector_dev_ptr) { + ushort2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ushort3* vector_dev_ptr) { + ushort3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ushort4* vector_dev_ptr) { + ushort4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedInt{R"( + __global__ void NegateDevice(uint1* vector_dev_ptr) { + uint1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uint2* vector_dev_ptr) { + uint2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uint3* vector_dev_ptr) { + uint3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uint4* vector_dev_ptr) { + uint4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedLong{R"( + __global__ void NegateDevice(ulong1* vector_dev_ptr) { + ulong1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulong2* vector_dev_ptr) { + ulong2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulong3* vector_dev_ptr) { + ulong3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulong4* vector_dev_ptr) { + ulong4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedLongLong{R"( + __global__ void NegateDevice(ulonglong1* vector_dev_ptr) { + ulonglong1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulonglong2* vector_dev_ptr) { + ulonglong2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulonglong3* vector_dev_ptr) { + ulonglong3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulonglong4* vector_dev_ptr) { + ulonglong4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong4& vector_host) { vector_host = -vector_host; } +)"}; diff --git a/catch/unit/vector_types/vector_operations_common.hh b/catch/unit/vector_types/vector_operations_common.hh new file mode 100644 index 0000000000..9684e0ecb1 --- /dev/null +++ b/catch/unit/vector_types/vector_operations_common.hh @@ -0,0 +1,338 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vector_types_common.hh" + +enum class VectorOperation { + kIncrementPrefix, + kIncrementPostfix, + kDecrementPrefix, + kDecrementPostfix, + kAddAssign, + kSubtractAssign, + kMultiplyAssign, + kDivideAssign, + kNegate, + kBitwiseNot, + kModuloAssign, + kBitwiseXorAssign, + kBitwiseOrAssign, + kBitwiseAndAssign, + kRightShiftAssign, + kLeftShiftAssign, + kAdd, + kSubtract, + kMultiply, + kDivide, + kEqual, + kNotEqual, + kModulo, + kBitwiseXor, + kBitwiseOr, + kBitwiseAnd, + kRightShift, + kLeftShift +}; + +inline std::string to_string(VectorOperation operation) { + switch (operation) { + case VectorOperation::kIncrementPrefix: + return "increment (prefix)"; + case VectorOperation::kIncrementPostfix: + return "increment (postfix)"; + case VectorOperation::kDecrementPrefix: + return "decrement (prefix)"; + case VectorOperation::kDecrementPostfix: + return "decrement (postfix)"; + case VectorOperation::kAddAssign: + return "add and assign"; + case VectorOperation::kSubtractAssign: + return "subtract and assign"; + case VectorOperation::kMultiplyAssign: + return "multiply and assign"; + case VectorOperation::kDivideAssign: + return "divide and assign"; + case VectorOperation::kNegate: + return "negate"; + case VectorOperation::kBitwiseNot: + return "bitwise not"; + case VectorOperation::kModuloAssign: + return "modulo and assign"; + case VectorOperation::kBitwiseXorAssign: + return "bitwise XOR and assign"; + case VectorOperation::kBitwiseOrAssign: + return "bitwise OR and assign"; + case VectorOperation::kBitwiseAndAssign: + return "bitwise AND and assign"; + case VectorOperation::kRightShiftAssign: + return "right shift and assign"; + case VectorOperation::kLeftShiftAssign: + return "left shift and assign"; + case VectorOperation::kAdd: + return "add"; + case VectorOperation::kSubtract: + return "subtract"; + case VectorOperation::kMultiply: + return "multiply"; + case VectorOperation::kDivide: + return "divide"; + case VectorOperation::kEqual: + return "equal"; + case VectorOperation::kNotEqual: + return "not equal"; + case VectorOperation::kModulo: + return "modulo"; + case VectorOperation::kBitwiseXor: + return "bitwise XOR"; + case VectorOperation::kBitwiseOr: + return "bitwise OR"; + case VectorOperation::kBitwiseAnd: + return "bitwise AND"; + case VectorOperation::kRightShift: + return "right shift"; + case VectorOperation::kLeftShift: + return "left shift"; + default: + return "Unknown"; + } +} + +template +void SanityCheck(VectorOperation operation, T vector, decltype(T().x) value1, + decltype(T().x) value2) { + if (operation == VectorOperation::kIncrementPrefix) { + ++value1; + } else if (operation == VectorOperation::kIncrementPostfix) { + value1++; + } else if (operation == VectorOperation::kDecrementPrefix) { + --value1; + } else if (operation == VectorOperation::kDecrementPostfix) { + value1--; + } else if (operation == VectorOperation::kAddAssign) { + value1 += value2; + } else if (operation == VectorOperation::kSubtractAssign) { + value1 -= value2; + } else if (operation == VectorOperation::kMultiplyAssign) { + value1 *= value2; + } else if (operation == VectorOperation::kDivideAssign) { + value1 /= value2; + } else if (operation == VectorOperation::kAdd) { + value1 = value1 + value2; + } else if (operation == VectorOperation::kSubtract) { + value1 = value1 - value2; + } else if (operation == VectorOperation::kMultiply) { + value1 = value1 * value2; + } else if (operation == VectorOperation::kDivide) { + value1 = value1 / value2; + } else if (operation == VectorOperation::kEqual) { + value1 = (value1 == value2) ? 2 * value1 : 3 * value1; + } else if (operation == VectorOperation::kNotEqual) { + value1 = (value1 != value2) ? 2 * value1 : 3 * value1; + } else { + if constexpr (std::is_signed_v) { + if (operation == VectorOperation::kNegate) { + value1 = -value1; + } + } + if constexpr (std::is_integral_v) { + if (operation == VectorOperation::kBitwiseNot) { + value1 = ~value1; + } else if (operation == VectorOperation::kModuloAssign) { + value1 %= value2; + } else if (operation == VectorOperation::kBitwiseXorAssign) { + value1 ^= value2; + } else if (operation == VectorOperation::kBitwiseOrAssign) { + value1 |= value2; + } else if (operation == VectorOperation::kBitwiseAndAssign) { + value1 &= value2; + } else if (operation == VectorOperation::kRightShiftAssign) { + value1 >>= value2; + } else if (operation == VectorOperation::kLeftShiftAssign) { + value1 <<= value2; + } else if (operation == VectorOperation::kModulo) { + value1 = value1 % value2; + } else if (operation == VectorOperation::kBitwiseXor) { + value1 = value1 ^ value2; + } else if (operation == VectorOperation::kBitwiseOr) { + value1 = value1 | value2; + } else if (operation == VectorOperation::kBitwiseAnd) { + value1 = value1 & value2; + } else if (operation == VectorOperation::kRightShift) { + value1 = value1 >> value2; + } else if (operation == VectorOperation::kLeftShift) { + value1 = value1 << value2; + } + } + } + SanityCheck(vector, value1); +} + +template +__device__ __host__ void PerformVectorOperation(VectorOperation operation, T* vector1, + const T& vector2) { + if (operation == VectorOperation::kIncrementPrefix) { + ++(*vector1); + } else if (operation == VectorOperation::kIncrementPostfix) { + (*vector1)++; + } else if (operation == VectorOperation::kDecrementPrefix) { + --(*vector1); + } else if (operation == VectorOperation::kDecrementPostfix) { + (*vector1)--; + } else if (operation == VectorOperation::kAddAssign) { + *vector1 += vector2; + } else if (operation == VectorOperation::kSubtractAssign) { + *vector1 -= vector2; + } else if (operation == VectorOperation::kMultiplyAssign) { + *vector1 *= vector2; + } else if (operation == VectorOperation::kDivideAssign) { + *vector1 /= vector2; + } else if (operation == VectorOperation::kAdd) { + *vector1 = *vector1 + vector2; + } else if (operation == VectorOperation::kSubtract) { + *vector1 = *vector1 - vector2; + } else if (operation == VectorOperation::kMultiply) { + *vector1 = *vector1 * vector2; + } else if (operation == VectorOperation::kDivide) { + *vector1 = *vector1 / vector2; + } else if (operation == VectorOperation::kEqual) { + *vector1 = (*vector1 == vector2) ? 2 * *vector1 : 3 * *vector1; + } else if (operation == VectorOperation::kNotEqual) { + *vector1 = (*vector1 != vector2) ? 2 * *vector1 : 3 * *vector1; + } else { + if constexpr (std::is_signed_v) { + if (operation == VectorOperation::kNegate) { + *vector1 = -(*vector1); + } + } + if constexpr (std::is_integral_v) { + if (operation == VectorOperation::kBitwiseNot) { + *vector1 = ~(*vector1); + } else if (operation == VectorOperation::kModuloAssign) { + *vector1 %= vector2; + } else if (operation == VectorOperation::kBitwiseXorAssign) { + *vector1 ^= vector2; + } else if (operation == VectorOperation::kBitwiseOrAssign) { + *vector1 |= vector2; + } else if (operation == VectorOperation::kBitwiseAndAssign) { + *vector1 &= vector2; + } else if (operation == VectorOperation::kRightShiftAssign) { + *vector1 >>= vector2; + } else if (operation == VectorOperation::kLeftShiftAssign) { + *vector1 <<= vector2; + } else if (operation == VectorOperation::kModulo) { + *vector1 = *vector1 % vector2; + } else if (operation == VectorOperation::kBitwiseXor) { + *vector1 = *vector1 ^ vector2; + } else if (operation == VectorOperation::kBitwiseOr) { + *vector1 = *vector1 | vector2; + } else if (operation == VectorOperation::kBitwiseAnd) { + *vector1 = *vector1 & vector2; + } else if (operation == VectorOperation::kRightShift) { + *vector1 = *vector1 >> vector2; + } else if (operation == VectorOperation::kLeftShift) { + *vector1 = *vector1 << vector2; + } + } + } +} + +template +__device__ __host__ void PerformVectorOperation(VectorOperation operation, T* vector, + decltype(T().x) value) { + if (operation == VectorOperation::kAddAssign) { + *vector += value; + } else if (operation == VectorOperation::kSubtractAssign) { + *vector -= value; + } else if (operation == VectorOperation::kMultiplyAssign) { + *vector *= value; + } else if (operation == VectorOperation::kDivideAssign) { + *vector /= value; + } else if (operation == VectorOperation::kAdd) { + *vector = *vector + value; + } else if (operation == VectorOperation::kSubtract) { + *vector = *vector - value; + } else if (operation == VectorOperation::kMultiply) { + *vector = *vector * value; + } else if (operation == VectorOperation::kDivide) { + *vector = *vector / value; + } else if (operation == VectorOperation::kEqual) { + *vector = (*vector == value) ? 2 * *vector : 3 * *vector; + } else if (operation == VectorOperation::kNotEqual) { + *vector = (*vector != value) ? 2 * *vector : 3 * *vector; + } else { + if constexpr (std::is_integral_v) { + if (operation == VectorOperation::kModulo) { + *vector = *vector % value; + } else if (operation == VectorOperation::kBitwiseXor) { + *vector = *vector ^ value; + } else if (operation == VectorOperation::kBitwiseOr) { + *vector = *vector | value; + } else if (operation == VectorOperation::kBitwiseAnd) { + *vector = *vector & value; + } else if (operation == VectorOperation::kRightShift) { + *vector = *vector >> value; + } else if (operation == VectorOperation::kLeftShift) { + *vector = *vector << value; + } + } + } +} + +template +T PerformVectorOperationHost(VectorOperation operation, decltype(T().x) value1, + decltype(T().x) value2) { + T vector1{}; + MakeVectorType(&vector1, value1); + + if constexpr (two_vectors) { + T vector2{}; + MakeVectorType(&vector2, value2); + PerformVectorOperation(operation, &vector1, vector2); + } else { + PerformVectorOperation(operation, &vector1, value2); + } + + return vector1; +} + +template +__global__ void VectorOperationKernel(VectorOperation operation, T* vector1, decltype(T().x) value1, + decltype(T().x) value2) { + MakeVectorType(vector1, value1); + if constexpr (two_vectors) { + T vector2{}; + MakeVectorType(&vector2, value2); + PerformVectorOperation(operation, vector1, vector2); + } else { + PerformVectorOperation(operation, vector1, value2); + } +} + +template +T PerformVectorOperationDevice(VectorOperation operation, decltype(T().x) value1, + decltype(T().x) value2) { + T vector_h{}; + T* vector_d; + HIP_CHECK(hipMalloc(&vector_d, sizeof(T))); + HIP_CHECK(hipMemcpy(vector_d, &vector_h, sizeof(T), hipMemcpyHostToDevice)); + VectorOperationKernel<<<1, 1, 0, 0>>>(operation, vector_d, value1, value2); + HIP_CHECK(hipMemcpy(&vector_h, vector_d, sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipFree(vector_d)); + return vector_h; +} diff --git a/catch/unit/vector_types/vector_types.cc b/catch/unit/vector_types/vector_types.cc new file mode 100644 index 0000000000..2a4b5eda0c --- /dev/null +++ b/catch/unit/vector_types/vector_types.cc @@ -0,0 +1,357 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vector_operations_common.hh" +#if HT_AMD +#include "negative_negate_unsigned_rtc.hh" +#include "negative_bitwise_float_double_rtc.hh" +#include "negative_calculate_assign_with_value_rtc.hh" +#endif + +/** + * @addtogroup make_vector make_vector + * @{ + * @ingroup VectorTypeTest + */ + +/** + * Test Description + * ------------------------ + * - Creates vectors for all supported types: + * -# make_char1, make_char2, make_char3, make_char4 + * -# make_uchar1, make_uchar2, make_uchar3, make_uchar4 + * -# make_short1, make_short2, make_short3, make_short4 + * -# make_ushort1, make_ushort2, make_ushort3, make_ushort4 + * -# make_int1, make_int2, make_int3, make_int4 + * -# make_uint1, make_uint2, make_uint4, make_uint4 + * -# make_long1, make_long2, make_long3, make_long4 + * -# make_ulong1, make_ulong2, make_ulong3, make_ulong4 + * -# make_longlong1, make_longlong2, make_longlong3, make_longlong4 + * -# make_ulonglong1, make_ulonglong2, make_ulonglong3, make_ulonglong4 + * -# make_float1, make_float2, make_float3, make_float4 + * -# make_double1, make_double2, make_double3, make_double4 + * - Checks that each vector type is created as expected + * - Calls make function from the host side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_make_vector_SanityCheck_Basic_Host", "", char1, uchar1, char2, uchar2, + char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, short3, ushort3, + short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, uint4, long1, + ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, ulonglong1, + longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, float1, + float2, float3, float4, double1, double2, double3, double4) { + auto value = GetTestValue(0); + TestType vector = MakeVectorTypeHost(value); + SanityCheck(vector, value); +} + +/** + * Test Description + * ------------------------ + * - Creates vectors for all supported types: + * -# make_char1, make_char2, make_char3, make_char4 + * -# make_uchar1, make_uchar2, make_uchar3, make_uchar4 + * -# make_short1, make_short2, make_short3, make_short4 + * -# make_ushort1, make_ushort2, make_ushort3, make_ushort4 + * -# make_int1, make_int2, make_int3, make_int4 + * -# make_uint1, make_uint2, make_uint4, make_uint4 + * -# make_long1, make_long2, make_long3, make_long4 + * -# make_ulong1, make_ulong2, make_ulong3, make_ulong4 + * -# make_longlong1, make_longlong2, make_longlong3, make_longlong4 + * -# make_ulonglong1, make_ulonglong2, make_ulonglong3, make_ulonglong4 + * -# make_float1, make_float2, make_float3, make_float4 + * -# make_double1, make_double2, make_double3, make_double4 + * - Checks that each vector type is created as expected + * - Calls make function from the device side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_make_vector_SanityCheck_Basic_Device", "", char1, uchar1, char2, uchar2, + char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, short3, ushort3, + short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, uint4, long1, + ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, ulonglong1, + longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, float1, + float2, float3, float4, double1, double2, double3, double4) { + auto value = GetTestValue(0); + TestType vector = MakeVectorTypeDevice(value); + SanityCheck(vector, value); +} + +#if HT_AMD +/** + * Test Description + * ------------------------ + * - Performs supported operations between all supported vector types + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the host side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndVectorOperations_SanityCheck_Basic_Host", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : {VectorOperation::kIncrementPrefix, + VectorOperation::kIncrementPostfix, + VectorOperation::kDecrementPrefix, + VectorOperation::kDecrementPostfix, + VectorOperation::kAddAssign, + VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, + VectorOperation::kDivideAssign, + VectorOperation::kNegate, + VectorOperation::kBitwiseNot, + VectorOperation::kModuloAssign, + VectorOperation::kBitwiseXorAssign, + VectorOperation::kBitwiseOrAssign, + VectorOperation::kBitwiseAndAssign, + VectorOperation::kRightShiftAssign, + VectorOperation::kLeftShiftAssign, + VectorOperation::kAdd, + VectorOperation::kSubtract, + VectorOperation::kMultiply, + VectorOperation::kDivide, + VectorOperation::kEqual, + VectorOperation::kNotEqual, + VectorOperation::kModulo, + VectorOperation::kBitwiseXor, + VectorOperation::kBitwiseOr, + VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, + VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationHost(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs supported operations between vector and underlying vector type (scalar) + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the host side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndValueTypeOperations_SanityCheck_Basic_Host", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : + {VectorOperation::kAddAssign, VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, VectorOperation::kDivideAssign, VectorOperation::kAdd, + VectorOperation::kSubtract, VectorOperation::kMultiply, VectorOperation::kDivide, + VectorOperation::kEqual, VectorOperation::kNotEqual, VectorOperation::kModulo, + VectorOperation::kBitwiseXor, VectorOperation::kBitwiseOr, VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationHost(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs supported operations between all supported vector types + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the device side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndVectorOperations_SanityCheck_Basic_Device", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : {VectorOperation::kIncrementPrefix, + VectorOperation::kIncrementPostfix, + VectorOperation::kDecrementPrefix, + VectorOperation::kDecrementPostfix, + VectorOperation::kAddAssign, + VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, + VectorOperation::kDivideAssign, + VectorOperation::kNegate, + VectorOperation::kBitwiseNot, + VectorOperation::kModuloAssign, + VectorOperation::kBitwiseXorAssign, + VectorOperation::kBitwiseOrAssign, + VectorOperation::kBitwiseAndAssign, + VectorOperation::kRightShiftAssign, + VectorOperation::kLeftShiftAssign, + VectorOperation::kAdd, + VectorOperation::kSubtract, + VectorOperation::kMultiply, + VectorOperation::kDivide, + VectorOperation::kEqual, + VectorOperation::kNotEqual, + VectorOperation::kModulo, + VectorOperation::kBitwiseXor, + VectorOperation::kBitwiseOr, + VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, + VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationDevice(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs supported operations between vector and underlying vector type (scalar) + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the device side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndValueTypeOperations_SanityCheck_Basic_Device", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : + {VectorOperation::kAddAssign, VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, VectorOperation::kDivideAssign, VectorOperation::kAdd, + VectorOperation::kSubtract, VectorOperation::kMultiply, VectorOperation::kDivide, + VectorOperation::kEqual, VectorOperation::kNotEqual, VectorOperation::kModulo, + VectorOperation::kBitwiseXor, VectorOperation::kBitwiseOr, VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationDevice(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +template void VectorTypesRTCWrapper(const char* program_source) { + hiprtcProgram program{}; + HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "vector_types_kernels.cc", 0, nullptr, + nullptr)); + +#if HT_AMD + std::string args = std::string("-ferror-limit=100"); + const char* options[] = {args.c_str()}; + hiprtcResult result{hiprtcCompileProgram(program, 1, options)}; +#else + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; +#endif + + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + std::string error_message{"error:"}; + + size_t npos_e = log.find(error_message, 0); + while (npos_e != std::string::npos) { + ++error_count; + npos_e = log.find(error_message, npos_e + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_errors_num); +} + +/** + * Test Description + * ------------------------ + * - Compiles kernels and host functions with negative scenarios: + * -# Negate (-) operator on the unsigned vectors + * -# Bitwise operators on the floating-point vectors + * -# Calculate-assign operators that are not supported between vector and scalar + * - Utilizes HIP RTC for compilation + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_VectorOperators_Negative_Parameters_RTC") { + VectorTypesRTCWrapper<8>(kNegateUnsignedChar); + VectorTypesRTCWrapper<8>(kNegateUnsignedShort); + VectorTypesRTCWrapper<8>(kNegateUnsignedInt); + VectorTypesRTCWrapper<8>(kNegateUnsignedLong); + VectorTypesRTCWrapper<8>(kNegateUnsignedLongLong); + VectorTypesRTCWrapper<96>(kBitwiseFloat); + VectorTypesRTCWrapper<96>(kBitwiseDouble); + VectorTypesRTCWrapper<96>(kCalculateAssignChar); + VectorTypesRTCWrapper<96>(kCalculateAssignShort); + VectorTypesRTCWrapper<96>(kCalculateAssignInt); + VectorTypesRTCWrapper<96>(kCalculateAssignLong); + VectorTypesRTCWrapper<96>(kCalculateAssignLongLong); +} +#endif // HT_AMD diff --git a/catch/unit/vector_types/vector_types_common.hh b/catch/unit/vector_types/vector_types_common.hh new file mode 100644 index 0000000000..cbaf3f24a6 --- /dev/null +++ b/catch/unit/vector_types/vector_types_common.hh @@ -0,0 +1,187 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +constexpr auto kIntegerTestValueFirst = 42; +constexpr auto kIntegerTestValueSecond = 4; +constexpr auto kFloatingPointTestValueFirst = 42.125; +constexpr auto kFloatingPointTestValueSecond = 4.875; + +template T GetTestValue(int index) { + if (index == 0) { + return std::is_floating_point_v ? static_cast(kIntegerTestValueFirst) + : static_cast(kFloatingPointTestValueFirst); + } else { + return std::is_floating_point_v ? static_cast(kIntegerTestValueSecond) + : static_cast(kFloatingPointTestValueSecond); + } +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); + REQUIRE(vector.y == expected_value); +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); + REQUIRE(vector.y == expected_value); + REQUIRE(vector.z == expected_value); +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); + REQUIRE(vector.y == expected_value); + REQUIRE(vector.z == expected_value); + REQUIRE(vector.w == expected_value); +} + +template +__host__ __device__ void MakeVectorType(T* vector_ptr, decltype(T().x) value) { + if constexpr (std::is_same_v) { + *vector_ptr = make_char1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_char2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_char3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_char4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double4(value, value, value, value); + } +} + +template T MakeVectorTypeHost(decltype(T().x) value) { + T vector{}; + MakeVectorType(&vector, value); + return vector; +} + +template __global__ void VectorTypeKernel(T* vector, decltype(T().x) value) { + MakeVectorType(vector, value); +} + +template T MakeVectorTypeDevice(decltype(T().x) value) { + T vector_h{}; + T* vector_d; + HIP_CHECK(hipMalloc(&vector_d, sizeof(T))); + HIP_CHECK(hipMemcpy(vector_d, &vector_h, sizeof(T), hipMemcpyHostToDevice)); + VectorTypeKernel<<<1, 1, 0, 0>>>(vector_d, value); + HIP_CHECK(hipMemcpy(&vector_h, vector_d, sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipFree(vector_d)); + return vector_h; +} diff --git a/catch/unit/virtualMemoryManagement/CMakeLists.txt b/catch/unit/virtualMemoryManagement/CMakeLists.txt new file mode 100644 index 0000000000..f540fdd25b --- /dev/null +++ b/catch/unit/virtualMemoryManagement/CMakeLists.txt @@ -0,0 +1,55 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(TEST_SRC + hipMemGetAllocationGranularity.cc + hipMemRetainAllocationHandle.cc) + +if(HIP_PLATFORM MATCHES "nvidia") +set(TEST_SRC + ${TEST_SRC} + hipMemMapArrayAsync.cc) + +if(UNIX) # Disabled on AMD due to defect EXSWHTEC-375 + set(TEST_SRC + ${TEST_SRC} + hipMemExportToShareableHandle.cc + hipMemImportFromShareableHandle.cc) +endif() + +endif() + +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + ${TEST_SRC} + hipMemAddressFree.cc + hipMemAddressReserve.cc + hipMemCreate.cc + hipMemSetGetAccess.cc + hipMemGetAllocationPropertiesFromHandle.cc + hipMemMap.cc + hipMemRelease.cc + hipMemUnmap.cc + hipMemVmm_old.cc) +endif() + +hip_add_exe_to_target(NAME VirtualMemoryManagementTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) \ No newline at end of file diff --git a/catch/unit/virtualMemoryManagement/hipMemAddressFree.cc b/catch/unit/virtualMemoryManagement/hipMemAddressFree.cc new file mode 100644 index 0000000000..6171f830ac --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemAddressFree.cc @@ -0,0 +1,74 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemAddressFree hipMemAddressFree + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemAddressFree (void* devPtr, size_t size)` - + * Frees an address range reservation made via hipMemAddressReserve. + */ + +#include + +#include "hip_vmm_common.hh" + +#define DATA_SIZE (1 << 13) + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemAddressFree.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemAddressFree_negative") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + + SECTION("nullptr to devptr") { + REQUIRE(hipMemAddressFree(nullptr, size_mem) == hipErrorInvalidValue); + } + + SECTION("pass zero to size") { REQUIRE(hipMemAddressFree(ptrA, 0) == hipErrorInvalidValue); } + + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} diff --git a/catch/unit/virtualMemoryManagement/hipMemAddressReserve.cc b/catch/unit/virtualMemoryManagement/hipMemAddressReserve.cc new file mode 100644 index 0000000000..c19f8ada43 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemAddressReserve.cc @@ -0,0 +1,150 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemAddressReserve hipMemAddressReserve + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemAddressReserve (void** ptr, + * size_t size, + * size_t alignment, + * void* addr, + * unsigned long long flags)` - + * Reserves an address range. + */ + +#include + +#include "hip_vmm_common.hh" + +#define DATA_SIZE (1 << 13) + +/** + * Test Description + * ------------------------ + * - Verify if reserved address returned by hipMemAddressReserve + * for different alignment values are correctly aligned. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemAddressReserve.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemAddressReserve_AlignmentTest") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + constexpr int initializer = 0; + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate virtual address range + hipDeviceptr_t ptrA; + size_t alignmnt = 1; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // check for address alignment fron 2 to 1024 + for (int iter = 0; iter < 12; iter++) { + alignmnt = alignmnt * 2; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, alignmnt, 0, 0)); + REQUIRE((reinterpret_cast(ptrA) % alignmnt) == 0); + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + } + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemAddressReserve.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemAddressReserve_Negative") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate virtual address range + hipDeviceptr_t ptrA; + + SECTION("Nullptr to ptr") { + REQUIRE(hipMemAddressReserve(nullptr, size_mem, 0, 0, 0) == hipErrorInvalidValue); + } + + SECTION("pass size as 0") { + REQUIRE(hipMemAddressReserve(&ptrA, 0, 0, 0, 0) == hipErrorMemoryAllocation); + } + +#if HT_NVIDIA + SECTION("pass non power of two for alignment") { + REQUIRE(hipMemAddressReserve(&ptrA, size_mem, 3, 0, 0) == hipErrorMemoryAllocation); + } +#endif + + SECTION("pass size as non multiple of host page size") { + REQUIRE(hipMemAddressReserve(&ptrA, (size_mem - 1), 0, 0, 0) == hipErrorMemoryAllocation); + } +} diff --git a/catch/unit/virtualMemoryManagement/hipMemCreate.cc b/catch/unit/virtualMemoryManagement/hipMemCreate.cc new file mode 100644 index 0000000000..4cc52786ad --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemCreate.cc @@ -0,0 +1,445 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemCreate hipMemCreate + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemCreate (hipMemGenericAllocationHandle_t* handle, + * size_t size, + * const hipMemAllocationProp* prop, + * unsigned long long flags)` - + * Creates a memory allocation described by the properties and size. + */ + +#include +#include + +#include "hip_vmm_common.hh" + +#define THREADS_PER_BLOCK 512 +#define NUM_OF_BUFFERS 3 +#define DATA_SIZE (1 << 13) + +/** + Kernel to perform Square of input data. + */ +static __global__ void square_kernel(int* Buff) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + int temp = Buff[i] * Buff[i]; + Buff[i] = temp; +} + +/** + * Test Description + * ------------------------ + * - Allocate physical memories for different multiples of + * granularity and deallocate them. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity") { + size_t granularity = 0; + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + for (int mul = 1; mul < 64; mul++) { + HIP_CHECK(hipMemCreate(&handle, granularity * mul, &prop, 0)); + HIP_CHECK(hipMemRelease(handle)); + } +} + +/** + * Test Description + * ------------------------ + * - Allocate physical memory and map it to virtual address range. + * After setting device permission, copy data from host to VMM memory + * and back to host. Verify the result. Release handle at end after + * unmapping VMM range. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Allocate physical memory and map it to virtual address + * range. After setting device permission, copy data from host + * to VMM memory and back to host. Verify the result. Release + * handle before the VMM range is used. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Allocate physical memory and map it to virtual address + * range. After setting device permission, copy data from host + * to device, launch kernel to square the data, copy data back + * to host. Verify the result. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_ChkWithKerLaunch") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + std::vector A_h(N), B_h(N), C_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + C_h[idx] = idx * idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + // Invoke kernel + hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), dim3(THREADS_PER_BLOCK), 0, 0, + static_cast(ptrA)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Allocate multiple non-contiguous physical memory chunks + * and map it to contiguous virtual address range. After setting + * device permission, copy data from host to device, launch kernel + * to square the data, copy data back to host. Verify the result. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_MapNonContiguousChunks") { + size_t granularity = 0; + constexpr int numOfBuffers = NUM_OF_BUFFERS; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle[NUM_OF_BUFFERS]; + // Allocate 3 physical memory chunks + for (int count = 0; count < numOfBuffers; count++) { + HIP_CHECK(hipMemCreate(&handle[count], size_mem, &prop, 0)); + } + // Allocate virtual address range for all the memory chunks + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, (numOfBuffers * size_mem), 0, 0, 0)); + for (int idx = 0; idx < numOfBuffers; idx++) { + uint64_t uiptr = reinterpret_cast(ptrA); + uiptr = uiptr + idx * size_mem; + HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), size_mem, 0, handle[idx], 0)); + HIP_CHECK(hipMemRelease(handle[idx])); + } + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, (numOfBuffers * size_mem), &accessDesc, 1)); + std::vector A_h(numOfBuffers * size_mem), B_h(numOfBuffers * size_mem), + C_h(numOfBuffers * size_mem); + // Fill Data + for (size_t idx = 0; idx < (numOfBuffers * N); idx++) { + A_h[idx] = idx; + C_h[idx] = idx * idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), numOfBuffers * buffer_size)); + // Launch square kernel + hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), dim3(THREADS_PER_BLOCK), 0, 0, + static_cast(ptrA)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, numOfBuffers * buffer_size)); + HIP_CHECK(hipDeviceSynchronize()); + // Validate Results + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); + for (int idx = 0; idx < numOfBuffers; idx++) { + uint64_t uiptr = reinterpret_cast(ptrA); + uiptr = uiptr + idx * size_mem; + HIP_CHECK(hipMemUnmap(reinterpret_cast(uiptr), size_mem)); + } + HIP_CHECK(hipMemAddressFree(ptrA, (numOfBuffers * size_mem))); +} + +/** + * Test Description + * ------------------------ + * - (Check if the VMM address can be memset) Map a physical chunk + * to the VMM address range. Memset the VMM address range with initial + * value. Validate. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_ChkWithMemset") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + constexpr int init_val = 0; + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + std::vector A_h(N); + HIP_CHECK(hipMemset(ptrA, init_val, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(A_h.data(), ptrA, buffer_size)); + for (int idx = 0; idx < N; idx++) { + REQUIRE(A_h[idx] == init_val); + } + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemCreate_Negative") { + size_t granularity = 0; + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemGenericAllocationHandle_t handle; + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Device + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + SECTION("Nullptr to handle") { + REQUIRE(hipMemCreate(nullptr, granularity, &prop, 0) == hipErrorInvalidValue); + } + + SECTION("Nullptr to prop") { + REQUIRE(hipMemCreate(&handle, granularity, nullptr, 0) == hipErrorInvalidValue); + } + + SECTION("pass size as 0") { REQUIRE(hipMemCreate(&handle, 0, &prop, 0) == hipErrorInvalidValue); } + + SECTION("Pass prop type as invalid") { + prop.type = hipMemAllocationTypeInvalid; + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); + } + + SECTION("pass location as invalid") { + prop.location.type = hipMemLocationTypeInvalid; + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); + } + + SECTION("non multiple of granularity") { + REQUIRE(hipMemCreate(&handle, (granularity - 1), &prop, 0) == hipErrorInvalidValue); + } + + SECTION("pass location id as -1") { + prop.location.id = -1; // set to non existing device + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); + } + + SECTION("pass location id as > highest device number") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + prop.location.id = numDevices; // set to non existing device + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); + } +} diff --git a/catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc b/catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc new file mode 100644 index 0000000000..d5c4b5394e --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc @@ -0,0 +1,145 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemExportToShareableHandle hipMemExportToShareableHandle + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemExportToShareableHandle(void *shareableHandle, + * hipMemGenericAllocationHandle_t handle, + * hipMemAllocationHandleType handleType, + * unsigned long long flags)` - + * Exports an allocation to a requested shareable handle type. + */ + +#include + +#include "hip_vmm_common.hh" + +/** + * Test Description + * ------------------------ + * - Basic sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemExportToShareableHandle_Positive_Basic") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + REQUIRE(shareable_handle != nullptr); + + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemExportToShareableHandle_Negative_Parameters") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + + SECTION("shareableHandle == nullptr") { + HIP_CHECK_ERROR( + hipMemExportToShareableHandle(nullptr, handle, hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + +#if HT_AMD + SECTION("handle == nullptr") { + HIP_CHECK_ERROR(hipMemExportToShareableHandle(&shareable_handle, nullptr, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } +#endif + + SECTION("invalid handleType") { + HIP_CHECK_ERROR( + hipMemExportToShareableHandle(&shareable_handle, handle, hipMemHandleTypeWin32, 0), + hipErrorInvalidValue); + } + + SECTION("non-zero flags") { + HIP_CHECK_ERROR(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 1), + hipErrorInvalidValue); + } + + HIP_CHECK(hipMemRelease(handle)); + +#if HT_AMD // segfaults on NVIDIA + SECTION("released handle") { + HIP_CHECK_ERROR(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc b/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc new file mode 100644 index 0000000000..0f45a53810 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc @@ -0,0 +1,184 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemGetAllocationGranularity hipMemGetAllocationGranularity + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemGetAllocationGranularity (size_t* granularity, + * const hipMemAllocationProp* prop, + * hipMemAllocationGranularity_flags option)` - + * Calculates either the minimal or recommended granularity. + */ + +#include +#include +#include + +#include "hip_vmm_common.hh" + +/** + local function to invoke hipMemGetAllocationGranularity. + */ +void getGranularity(size_t* granularity, hipMemAllocationGranularity_flags option, int device) { + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK(hipMemGetAllocationGranularity(granularity, &prop, option)); +} + +/** + * Test Description + * ------------------------ + * - Functional Test to get granularity size for + * hipMemAllocationGranularityMinimum option. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAllocationGranularity_MinGranularity") { + HIP_CHECK(hipFree(0)); + size_t granularity = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + getGranularity(&granularity, hipMemAllocationGranularityMinimum, 0); + REQUIRE(granularity > 0); +} + +/** + * Test Description + * ------------------------ + * - Functional Test to get granularity size for + * hipMemAllocationGranularityRecommended option. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAllocationGranularity_RecommendedGranularity") { + HIP_CHECK(hipFree(0)); + size_t granularity = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + getGranularity(&granularity, hipMemAllocationGranularityRecommended, 0); + REQUIRE(granularity > 0); +} + +/** + * Test Description + * ------------------------ + * - Functional Test to get granularity size for + * hipMemAllocationGranularityMinimum option for all GPUs. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAllocationGranularity_AllGPUs") { + HIP_CHECK(hipFree(0)); + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + for (int dev = 0; dev < numDevices; dev++) { + size_t granularity = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, dev)); + checkVMMSupported(device); + getGranularity(&granularity, hipMemAllocationGranularityRecommended, dev); + REQUIRE(granularity > 0); + } +} + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAllocationGranularity_NegativeTests") { + HIP_CHECK(hipFree(0)); + size_t granularity = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = 0; // Current Devices + + SECTION("Granularity is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipMemGetAllocationGranularity(nullptr, &prop, hipMemAllocationGranularityMinimum)); + } +#if HT_AMD // segfaults on NVIDIA + SECTION("Prop is nullptr") { + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, nullptr, hipMemAllocationGranularityMinimum)); + } +#endif +#if HT_NVIDIA + SECTION("flag is invalid") { + REQUIRE(hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, + (hipMemAllocationGranularity_flags)0xff)); + } +#endif +#if HT_AMD // succeeds on NVIDIA + SECTION("device id > highest device id") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + prop.location.id = numDevices; // set to non existing device + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + } + SECTION("device id < lowest device id") { + prop.location.id = -1; // set to non existing device + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + } + SECTION("allocation type as invalid") { + prop.type = hipMemAllocationTypeInvalid; + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + } + SECTION("location type as invalid") { + prop.location.type = hipMemLocationTypeInvalid; + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + } +#endif +} diff --git a/catch/unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc b/catch/unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc new file mode 100644 index 0000000000..cc3a8dc519 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc @@ -0,0 +1,117 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemGetAllocationPropertiesFromHandle hipMemGetAllocationPropertiesFromHandle + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, + * hipMemGenericAllocationHandle_t handle)` - + * Retrieve the property structure of the given handle. + */ + +#include + +#include "hip_vmm_common.hh" + +#define DATA_SIZE (1 << 13) + +/** + * Test Description + * ------------------------ + * - Functional test to verify the values of hipMemAllocationProp properties. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_functional") { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + hipMemGenericAllocationHandle_t handle; + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + // create a temp prop structure. + hipMemAllocationProp prop_temp = {}; + size_t granularity = 0; + int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t mem_size = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, mem_size, &prop, 0)); + // verify properties has been retrived from handle + HIP_CHECK(hipMemGetAllocationPropertiesFromHandle(&prop_temp, handle)); + REQUIRE(prop_temp.type == prop.type); + REQUIRE(prop_temp.location.type == prop.location.type); + REQUIRE(prop_temp.location.id == prop.location.id); + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Negative Tests. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_Negative") { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + hipMemGenericAllocationHandle_t handle; + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + // create a temp prop structure. + hipMemAllocationProp prop_temp = {}; + size_t granularity = 0; + int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t mem_size = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, mem_size, &prop, 0)); + + SECTION("Nullptr as prop") { + REQUIRE(hipMemGetAllocationPropertiesFromHandle(nullptr, handle) == hipErrorInvalidValue); + } + + SECTION("null handle") { + prop.location.type = hipMemLocationTypeInvalid; + REQUIRE(hipMemGetAllocationPropertiesFromHandle(&prop_temp, nullptr) == hipErrorInvalidValue); + } + + HIP_CHECK(hipMemRelease(handle)); +} diff --git a/catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc b/catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc new file mode 100644 index 0000000000..f362e4f049 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc @@ -0,0 +1,210 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemExportToShareableHandle hipMemExportToShareableHandle + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t *handle, + * void *osHandle, + * hipMemAllocationHandleType shHandleType)` - + * Imports an allocation from a requested shareable handle type. + */ + +#include +#include +#include + +#include + +#include "hip_vmm_common.hh" + +/** + * Test Description + * ------------------------ + * - Basic sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemImportFromShareableHandle_Positive_Basic") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + + hipMemGenericAllocationHandle_t imported_handle; + HIP_CHECK(hipMemImportFromShareableHandle(&imported_handle, shareable_handle, + hipMemHandleTypePosixFileDescriptor)); + + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Basic multiprocess sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemImportFromShareableHandle_Positive_MultiProc") { + int fd[2]; + REQUIRE(pipe(fd) == 0); + + auto pid = fork(); + REQUIRE(pid >= 0); + + if (pid == 0) { // child + REQUIRE(close(fd[1]) == 0); + + void* shareable_handle = nullptr; + REQUIRE(read(fd[0], &shareable_handle, sizeof(shareable_handle)) >= 0); + REQUIRE(close(fd[0]) == 0); + + REQUIRE(shareable_handle != nullptr); + + HIP_CHECK(hipFree(0)); + + hipMemGenericAllocationHandle_t imported_handle; + HIP_CHECK(hipMemImportFromShareableHandle(&imported_handle, shareable_handle, + hipMemHandleTypePosixFileDescriptor)); + + exit(0); + } else { // parent + REQUIRE(close(fd[0]) == 0); + + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + + REQUIRE(write(fd[1], &shareable_handle, sizeof(shareable_handle)) >= 0); + REQUIRE(close(fd[1]) == 0); + + REQUIRE(wait(NULL) >= 0); + + HIP_CHECK(hipMemRelease(handle)); + } +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemImportFromShareableHandle_Negative_Parameters") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + + hipMemGenericAllocationHandle_t imported_handle; + +#if HT_AMD + SECTION("handle == nullptr") { + HIP_CHECK_ERROR(hipMemImportFromShareableHandle(nullptr, shareable_handle, + hipMemHandleTypePosixFileDescriptor), + hipErrorInvalidValue); + } +#endif + + SECTION("shareableHandle == nullptr") { + HIP_CHECK_ERROR(hipMemImportFromShareableHandle(&imported_handle, nullptr, + hipMemHandleTypePosixFileDescriptor), + hipErrorInvalidValue); + } + + SECTION("invalid handleType") { + HIP_CHECK_ERROR( + hipMemImportFromShareableHandle(&imported_handle, shareable_handle, hipMemHandleTypeWin32), + hipErrorNotSupported); + } + + HIP_CHECK(hipMemRelease(handle)); +} \ No newline at end of file diff --git a/catch/unit/virtualMemoryManagement/hipMemMap.cc b/catch/unit/virtualMemoryManagement/hipMemMap.cc new file mode 100644 index 0000000000..7a46f0f3cb --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemMap.cc @@ -0,0 +1,632 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemMap hipMemMap + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemMap (void* ptr, + * size_t size, + * size_t offset, + * hipMemGenericAllocationHandle_t handle, + * unsigned long long flags)` - + * Maps an allocation handle to a reserved virtual address range. + */ + +#include + +#include "hip_vmm_common.hh" + +constexpr int N = (1 << 13); +constexpr int num_buf = 3; +constexpr int initializer = 0; + +/** + * Test Description + * ------------------------ + * - Check if a physical chunk can be mapped/unmapped to same + * vmm address range repeatedly. This test validates physical memory + * euse using same vmm range. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_SameMemoryReuse") { + constexpr int iterations = 20; + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N), C_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + C_h[idx] = idx * idx; + } + // Allocate a physical memory chunk + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate num_buf virtual address ranges + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + for (int i = 0; i < iterations; i++) { + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Set access to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); +#if HT_NVIDIA + square_kernel<<>>(static_cast(ptrA)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + HIP_CHECK(hipStreamSynchronize(0)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); +#endif + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + } + // Release resources + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Check if a physical chunk can be mapped/unmapped for multiple + * vmm addresses. This test validates physical memory reuse using + * different vmm ranges. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU") { + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N), C_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + C_h[idx] = idx * idx; + } + // Allocate a physical memory chunk + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate num_buf virtual address ranges + hipDeviceptr_t ptrA[num_buf]; + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemAddressReserve(&ptrA[buf], size_mem, 0, 0, 0)); + } + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + for (int buf = 0; buf < num_buf; buf++) { + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA[buf], size_mem, 0, handle, 0)); + // Set access to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA[buf], size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA[buf], A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA[buf], buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); +#if HT_NVIDIA + square_kernel<<>>( + static_cast(ptrA[buf])); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA[buf], buffer_size)); + HIP_CHECK(hipStreamSynchronize(0)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); +#endif + HIP_CHECK(hipMemUnmap(ptrA[buf], size_mem)); + } + // Release resources + HIP_CHECK(hipMemRelease(handle)); + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemAddressFree(ptrA[buf], size_mem)); + } +} + +/** + * Test Description + * ------------------------ + * - Check if a physical chunk can be mapped to multiple + * vmm addresses at the same time and check data values integrity + * between different VMMs. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_PhysicalMemory_Map2MultVMMs") { + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Allocate a physical memory chunk + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate num_buf virtual address ranges + hipDeviceptr_t ptrA[num_buf]; + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemAddressReserve(&ptrA[buf], size_mem, 0, 0, 0)); + } + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemMap(ptrA[buf], size_mem, 0, handle, 0)); + } + // Copy data to VMM via ptrA[0] + HIP_CHECK(hipMemSetAccess(ptrA[0], size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA[0], A_h.data(), buffer_size)); + // Validate the data contained in VMM using ptrA[0], ptrA[1], + // ......, ptrA[num_buf-1] + for (int buf = 0; buf < num_buf; buf++) { + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA[buf], buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + } + // Release resources + HIP_CHECK(hipMemRelease(handle)); + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemAddressFree(ptrA[buf], size_mem)); + } +} + +/** + * Test Description + * ------------------------ + * - Check if a physical chunk can be mapped/unmapped for + * multiple vmm addresses. This test validates physical memory + * reuse using different vmm ranges on multiple devices. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_MultiDev") { + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + if (devicecount < 2) { + HipTest::HIP_SKIP_TEST("Machine is Single GPU. Skipping Test.."); + return; + } + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + for (int devX = 0; devX < devicecount; devX++) { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, devX)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Allocate a physical memory chunk + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate devicecount virtual address ranges + std::vector ptrA(devicecount); + for (int devY = 0; devY < devicecount; devY++) { + HIP_CHECK(hipMemAddressReserve(&ptrA[devY], size_mem, 0, 0, 0)); + } + for (int devY = 0; devY < devicecount; devY++) { + hipDevice_t deviceToTest; + HIP_CHECK(hipDeviceGet(&deviceToTest, devY)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = deviceToTest; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipSetDevice(devY)); + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA[devY], size_mem, 0, handle, 0)); + // Set access to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA[devY], size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA[devY], A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA[devY], buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA[devY], size_mem)); + } + HIP_CHECK(hipSetDevice(0)); // set the device back to 0. + // Release resources + HIP_CHECK(hipMemRelease(handle)); + for (int devY = 0; devY < devicecount; devY++) { + HIP_CHECK(hipMemAddressFree(ptrA[devY], size_mem)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Check if different physical chunk can be mapped/unmapped + * for single vmm address. This test validates VMM memory reuse + * using different physical ranges. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_SingleGPU") { + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle[num_buf]; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N), C_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + C_h[idx] = idx * idx; + } + // Allocate a physical memory chunk + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemCreate(&handle[buf], size_mem, &prop, 0)); + } + // Allocate num_buf virtual address ranges + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Map ptrA to physical chunk + for (int buf = 0; buf < num_buf; buf++) { + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle[buf], 0)); + // Set access to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); +#if HT_NVIDIA + square_kernel<<>>(static_cast(ptrA)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + HIP_CHECK(hipStreamSynchronize(0)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); +#endif + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + } + // Release resources + for (int buf = 0; buf < num_buf; buf++) { + HIP_CHECK(hipMemRelease(handle[buf])); + } + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Check if different physical chunk allocated in different devices + * can be mapped/unmapped to single vmm address. This test validates VMM + * memory reuse using different physical ranges. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_MultiGPU") { + int deviceId = 0, devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + if (devicecount < 2) { + HipTest::HIP_SKIP_TEST("Machine is Single GPU. Skipping Test.."); + return; + } + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + std::vector handle(devicecount); + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Allocate a physical memory chunk + for (int dev = 0; dev < devicecount; dev++) { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, dev)); + prop.location.id = device; + HIP_CHECK(hipMemCreate(&handle[dev], size_mem, &prop, 0)); + } + // Allocate devicecount virtual address ranges + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + // Map ptrA to physical chunk + SECTION("Set Access of VMM to Different GPU") { + for (int dev = 0; dev < devicecount; dev++) { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, dev)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipSetDevice(dev)); + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle[dev], 0)); + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + } + } + SECTION("Set Access of VMM to default GPU") { + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + for (int dev = 0; dev < devicecount; dev++) { + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle[dev], 0)); + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + } + } + HIP_CHECK(hipSetDevice(0)); + // Release resources + for (int dev = 0; dev < devicecount; dev++) { + HIP_CHECK(hipMemRelease(handle[dev])); + } + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Check if a partial part of a physical chunk can be mapped/unmapped + * to a smaller vmm address. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_MapPartialPhysicalMem") { + int deviceId = 0; + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Allocate a bigger physical memory chunk of twice size_mem + HIP_CHECK(hipMemCreate(&handle, 2 * size_mem, &prop, 0)); + // Allocate virtual address range of size size_mem + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + // Release resources + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Check if a partial part of a VMM range can be mapped/unmapped + * to a physical address. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_MapPartialVMMMem") { + int deviceId = 0; + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Allocate a bigger physical memory chunk of size_mem + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range of size twice size_mem + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, 2 * size_mem, 0, 0, 0)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + // Release resources + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemAddressFree(ptrA, 2 * size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Negative Argument Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMap_negative") { + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + + SECTION("nullptr to ptrA") { + REQUIRE(hipMemMap(nullptr, size_mem, 0, handle, 0) == hipErrorInvalidValue); + } + + SECTION("pass zero to size") { + REQUIRE(hipMemMap(&ptrA, 0, 0, handle, 0) == hipErrorInvalidValue); + } + + SECTION("pass negative to offset") { + REQUIRE(hipMemMap(&ptrA, size_mem, -1, handle, 0) == hipErrorInvalidValue); + } + + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} diff --git a/catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc b/catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc new file mode 100644 index 0000000000..2bc726f82c --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc @@ -0,0 +1,110 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemMapArrayAsync hipMemMapArrayAsync + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemMapArrayAsync(hipArrayMapInfo *mapInfoList, + * unsigned int count, + * hipStream_t stream)` - + * Maps or unmaps subregions of sparse HIP arrays and sparse HIP mipmapped arrays. + */ + +#include +#include +#include + +#include "hip_vmm_common.hh" + +/** + * Test Description + * ------------------------ + * - Basic sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMapArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMapArrayAsync_Positive_Basic") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + CHECK_IMAGE_SUPPORT; + + hipmipmappedArray array; + + HIP_ARRAY3D_DESCRIPTOR desc = {}; + using vec_info = vector_info; + desc.Format = vec_info::format; + desc.NumChannels = vec_info::size; + desc.Width = 1; + desc.Height = 1; + desc.Flags = CUDA_ARRAY3D_SPARSE; + + unsigned int levels = 2; + + HIP_CHECK(hipMipmappedArrayCreate(&array, &desc, levels)); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + prop.allocFlags.usage = CU_MEM_CREATE_USAGE_TILE_POOL; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityRecommended)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity, &prop, 0)); + + hipArrayMapInfo map_info_list = {}; + map_info_list.resourceType = HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY; + map_info_list.resource.mipmap = array; + map_info_list.subresourceType = hipArraySparseSubresourceTypeSparseLevel; + map_info_list.subresource.sparseLevel.extentWidth = 1; + map_info_list.subresource.sparseLevel.extentHeight = 1; + map_info_list.subresource.sparseLevel.extentDepth = 1; + map_info_list.memOperationType = hipMemOperationTypeMap; + map_info_list.memHandleType = hipMemHandleTypeGeneric; + map_info_list.memHandle.memHandle = handle; + map_info_list.deviceBitMask = 0x1; + + StreamGuard stream(Streams::created); + + HIP_CHECK(hipMemMapArrayAsync(&map_info_list, 1, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + map_info_list.memOperationType = hipMemOperationTypeUnmap; + map_info_list.memHandle.memHandle = NULL; + HIP_CHECK(hipMemMapArrayAsync(&map_info_list, 1, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + HIP_CHECK(hipMemRelease(handle)); + + HIP_CHECK(hipMipmappedArrayDestroy(array)); +} \ No newline at end of file diff --git a/catch/unit/virtualMemoryManagement/hipMemRelease.cc b/catch/unit/virtualMemoryManagement/hipMemRelease.cc new file mode 100644 index 0000000000..e544710757 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemRelease.cc @@ -0,0 +1,46 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemRelease hipMemRelease + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipMemRelease(hipMemGenericAllocationHandle_t handle)` - + * Release a memory handle representing a memory allocation which was previously + * allocated through hipMemCreate. + */ + +#include + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemRelease.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemRelease_negative") { + SECTION("Nullptr to handle") { REQUIRE(hipMemRelease(nullptr) == hipErrorInvalidValue); } +} diff --git a/catch/unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc b/catch/unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc new file mode 100644 index 0000000000..1b4d00dea3 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc @@ -0,0 +1,141 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemRetainAllocationHandle hipMemRetainAllocationHandle + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, + * void* addr)` - + * Returns the allocation handle of the backing memory allocation given the address. + */ + +#include +#include + +#include "hip_vmm_common.hh" + +#define DATA_SIZE (1 << 13) + +/** + * Test Description + * ------------------------ + * - Create a VM mapped to physical memory. Input addr to + * hipMemRetainAllocationHandle and validate the handle. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemRetainAllocationHandle_SetGet") { + HIP_CHECK(hipFree(0)); + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Test hipMemRetainAllocationHandle + hipMemGenericAllocationHandle_t gethandle; + // Check beginning of VMM ptr + HIP_CHECK(hipMemRetainAllocationHandle(&gethandle, reinterpret_cast(ptrA))); + REQUIRE(gethandle == handle); + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemRetainAllocationHandle_NegTst") { + HIP_CHECK(hipFree(0)); + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Test hipMemRetainAllocationHandle + hipMemGenericAllocationHandle_t gethandle; + SECTION("nullptr handle") { + REQUIRE(hipMemRetainAllocationHandle(nullptr, reinterpret_cast(ptrA)) == + hipErrorInvalidValue); + } + SECTION("nullptr Vmm ptr") { + REQUIRE(hipMemRetainAllocationHandle(&gethandle, nullptr) == hipErrorInvalidValue); + } + SECTION("not mapped address") { + hipDeviceptr_t ptrB; + HIP_CHECK(hipMemAddressReserve(&ptrB, size_mem, 0, 0, 0)); + REQUIRE(hipMemRetainAllocationHandle(&gethandle, reinterpret_cast(ptrB)) == + hipErrorInvalidValue); + HIP_CHECK(hipMemAddressFree(ptrB, size_mem)); + } + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + SECTION("unmapped address") { + REQUIRE(hipMemRetainAllocationHandle(&gethandle, reinterpret_cast(ptrA)) == + hipErrorInvalidValue); + } + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} diff --git a/catch/unit/virtualMemoryManagement/hipMemSetGetAccess.cc b/catch/unit/virtualMemoryManagement/hipMemSetGetAccess.cc new file mode 100644 index 0000000000..dca05c5f1a --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemSetGetAccess.cc @@ -0,0 +1,1492 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemSetAccess hipMemSetAccess + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemSetAccess (void* ptr, + * size_t size, + * const hipMemAccessDesc* desc, + * size_t count)` - + * Set the access flags for each location specified in desc for the given + * virtual address range. + */ + +#ifdef __linux__ +#include +#include +#endif + +#include +#include + +#include "hipMallocManagedCommon.hh" +#include "hip_vmm_common.hh" + +#define THREADS_PER_BLOCK 512 +#define NUM_OF_BUFFERS 3 +#define DATA_SIZE (1 << 13) +#define NEW_DATA_SIZE (2 * DATA_SIZE) + +constexpr int initializer = 0; + +/** + Kernel to perform Square of input data. + */ +static __global__ void square_kernel(int* Buff) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + int temp = Buff[i] * Buff[i]; + Buff[i] = temp; +} + +/** + * Test Description + * ------------------------ + * - Create a VM mapped to physical memory. Set the access of the + * VMM chunk to device 0. Validate that flags = hipMemAccessFlagsProtReadWrite + * is returned by hipMemGetAccess() when location is set to device 0. + * Validate that flags = hipMemAccessFlagsProtNone is returned by + * hipMemGetAccess() when location is set to device 1. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_SetGet") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Validate using hipMemGetAccess() + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = device; + unsigned long long flags = 0; // NOLINT + HIP_CHECK(hipMemGetAccess(&flags, &location, ptrA)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + if (devicecount > 1) { + flags = 0; + HIP_CHECK(hipDeviceGet(&device, 1)); + location.type = hipMemLocationTypeDevice; + location.id = device; + HIP_CHECK(hipMemGetAccess(&flags, &location, ptrA)); + REQUIRE(flags == hipMemAccessFlagsProtNone); + } + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Mult Device Functional Test: Create a VM mapped to physical memory. + * Set the access of the VMM chunk to both device 0 and device 1. + * Validate that flags = hipMemAccessFlagsProtReadWrite is returned by + * hipMemGetAccess() when location is set to device 0. Validate that + * flags = hipMemAccessFlagsProtReadWrite is returned by hipMemGetAccess() + * when location is set to device 1. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_MultDevSetGet") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0, device_count = 0; + hipDevice_t device0, device1; + HIP_CHECK(hipGetDeviceCount(&device_count)); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("Need 2 GPUs to run test. Skipping Test.."); + return; + } + + HIP_CHECK(hipDeviceGet(&device0, deviceId)); + checkVMMSupported(device0); + HIP_CHECK(hipDeviceGet(&device1, (deviceId + 1))); + checkVMMSupported(device1); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device0; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc[2]; + accessDesc[0].location.type = hipMemLocationTypeDevice; + accessDesc[0].location.id = device0; + accessDesc[0].flags = hipMemAccessFlagsProtReadWrite; + accessDesc[1].location.type = hipMemLocationTypeDevice; + accessDesc[1].location.id = device1; + accessDesc[1].flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 and 1 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc[0], 2)); + // Validate using hipMemGetAccess() + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = device0; + unsigned long long flags = 0; // NOLINT + HIP_CHECK(hipMemGetAccess(&flags, &location, ptrA)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + location.type = hipMemLocationTypeDevice; + location.id = device1; + flags = 0; + HIP_CHECK(hipMemGetAccess(&flags, &location, ptrA)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Create a VM mapped to physical memory. Set the access of the VMM chunk + * to device 0. Validate that flags = 3 is returned by hipMemGetAccess() + * for entire virtual address range when location is set to device 0. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_EntireVMMRangeSetGet") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Validate hipMemGetAccess() + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = device; + unsigned long long flags = 0; // NOLINT + HIP_CHECK(hipMemGetAccess(&flags, &location, ptrA)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + uint64_t uiptr = reinterpret_cast(ptrA); + uiptr += (size_mem - 1); + HIP_CHECK(hipMemGetAccess(&flags, &location, reinterpret_cast(uiptr))); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemGetAccess_NegTst") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Validate hipMemGetAccess() -ve scenarios + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = device; + unsigned long long flags = 0; // NOLINT + hipError_t status = hipSuccess; + status = hipMemGetAccess(nullptr, &location, ptrA); + REQUIRE(status == hipErrorInvalidValue); + status = hipMemGetAccess(&flags, nullptr, ptrA); + REQUIRE(status == hipErrorInvalidValue); + uint64_t uiptr = reinterpret_cast(ptrA); + uiptr += size_mem; + status = hipMemGetAccess(&flags, &location, reinterpret_cast(uiptr)); + REQUIRE(status == hipErrorInvalidValue); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Test VMM functionality on multiple device. In each device, create + * a VM mapped to physical memory of the device, copy test data to the VM + * address range, launch a kernel to perform operation on the data and + * validate the result. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_FuncTstOnMultDev") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0, devicecount = 0; + hipDevice_t device; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + for (deviceId = 0; deviceId < devicecount; deviceId++) { + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipDeviceptr_t ptrA; + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU deviceId + std::vector A_h(N), B_h(N); + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + // Launch square kernel + hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), dim3(THREADS_PER_BLOCK), 0, 0, + static_cast(ptrA)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + } +} + +/** + * Test Description + * ------------------------ + * - Allocate physical memory and map it to a VMM range. + * Access (Read/Write) the virtual pointer directly on host. + * Ensure this behavior for all devices on host. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_AccessDirectlyFromHost") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + if (devicecount < 2) { + HipTest::HIP_SKIP_TEST("Machine is Single GPU. Skipping Test.."); + return; + } + for (int dev = 0; dev < devicecount; dev++) { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, dev)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate a physical memory chunk + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate num_buf virtual address ranges + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + int* vptr = reinterpret_cast(ptrA); + for (int idx = 0; idx < N; idx++) { + *(vptr + idx) = idx; + } + // validate + for (int idx = 0; idx < N; idx++) { + REQUIRE(*(vptr + idx) == idx); + } + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + // Release resources + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + } +} + +/** + * Test Description + * ------------------------ + * - Create a virtual memnory chunk and set the property of + * the range to read/write. Write to the memory chunk. Change + * the property of the range to read only. Check if the memory + * range can be read. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_ChangeAccessProp") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int dev = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, dev)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data + std::vector A_h(N), B_h(N); + // Initialize with data + for (size_t idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Allocate a physical memory chunk + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate num_buf virtual address ranges + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + SECTION("Change ReadWrite to Read") { + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + // Change property of virtual memory range to read only + accessDesc.flags = hipMemAccessFlagsProtRead; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // validate + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + } + SECTION("Change Read to ReadWrite") { + accessDesc.flags = hipMemAccessFlagsProtRead; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Change property of virtual memory range to read only + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + } + SECTION("Change Inaccessible to ReadWrite") { + accessDesc.flags = hipMemAccessFlagsProtNone; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + // Change property of virtual memory range to read only + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + } +#if HT_NVIDIA + SECTION("Check error while writing on Read-Only memory") { + accessDesc.flags = hipMemAccessFlagsProtRead; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + REQUIRE(hipErrorInvalidValue == hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + } + SECTION("Check error while writing on inaccessible memory") { + accessDesc.flags = hipMemAccessFlagsProtNone; + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + REQUIRE(hipErrorInvalidValue == hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + } +#endif + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + // Release resources + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Test Virtual Memory to Unified Memory data transfer. Allocate + * a Virtual Memory chunk and a Unified Memory chunk. Test if data can + * be exchanged between these chunks. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Vmm2UnifiedMemCpy") { + auto managed = HmmAttrPrint(); + if (managed != 1) { + HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory.Skipping Test.."); + return; + } + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA, ptrB; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + int *ptrA_h, *ptrB_h; + HIP_CHECK(hipMallocManaged(&ptrA_h, buffer_size)); + HIP_CHECK(hipMallocManaged(&ptrB_h, buffer_size)); + for (int idx = 0; idx < N; idx++) { + ptrA_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, ptrA_h, buffer_size)); + HIP_CHECK(hipMalloc(&ptrB, buffer_size)); + HIP_CHECK(hipMemcpyDtoD(ptrB, ptrA, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(ptrB_h, ptrB, buffer_size)); + bool bPassed = true; + for (int idx = 0; idx < N; idx++) { + if (ptrB_h[idx] != idx) { + bPassed = false; + break; + } + } + REQUIRE(bPassed == true); + HIP_CHECK(hipFree(ptrB)); + HIP_CHECK(hipFree(ptrA_h)); + HIP_CHECK(hipFree(ptrB_h)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Test Virtual Memory to Device Memory data transfer. Allocate a Virtual + * Memory chunk and a Device Memory chunk. Test if data can be exchanged + * between these chunks. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Vmm2DevMemCpy") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA, ptrB; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMalloc(&ptrB, buffer_size)); + HIP_CHECK(hipMemcpyDtoD(ptrB, ptrA, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrB, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipFree(ptrB)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - VM to Device Memory Copy. Allocate a Virtual Memory chunk and a + * Peer Device Memory chunk. Test if data can be exchanged between + * these chunks using hipMemcpyDtoD. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerDevMemCpy") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0, value = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + // Check Peer Access + for (deviceId = 1; deviceId < devicecount; deviceId++) { + int canAccessPeer = 0; + hipDevice_t device_other; + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, deviceId)); + if (0 == canAccessPeer) { + WARN("Machine does not support Peer Access\n"); + break; + } + HIP_CHECK(hipDeviceGet(&device_other, deviceId)); + HIP_CHECK(hipDeviceGetAttribute(&value, hipDeviceAttributeVirtualMemoryManagementSupported, + device_other)); + if (value == 0) { + // Virtual Memory Mgmt is not supported + WARN("Machine does not support Virtual Memory Management\n"); + break; + } + HIP_CHECK(hipSetDevice(deviceId)); + hipDeviceptr_t dptr_peer; + HIP_CHECK(hipMalloc(&dptr_peer, buffer_size)); + HIP_CHECK(hipMemcpyDtoD(dptr_peer, ptrA, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), dptr_peer, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipFree(dptr_peer)); + } + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - VM to Device Memory Copy: Allocate a Virtual Memory chunk and + * a Peer Device Memory chunk. Test if data can be exchanged between + * these chunks using hipMemcpyPeer. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0, value = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + // Check Peer Access + for (deviceId = 1; deviceId < devicecount; deviceId++) { + std::fill(B_h.begin(), B_h.end(), initializer); + int canAccessPeer = 0; + hipDevice_t device_other; + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, deviceId)); + if (0 == canAccessPeer) { + WARN("Machine does not support Peer Access\n"); + break; + } + HIP_CHECK(hipDeviceGet(&device_other, deviceId)); + HIP_CHECK(hipDeviceGetAttribute(&value, hipDeviceAttributeVirtualMemoryManagementSupported, + device_other)); + if (value == 0) { + // Virtual Memory Mgmt is not supported + WARN("Machine does not support Virtual Memory Management\n"); + break; + } + HIP_CHECK(hipSetDevice(deviceId)); + hipDeviceptr_t dptr_peer; + HIP_CHECK(hipMalloc(&dptr_peer, buffer_size)); + HIP_CHECK(hipMemcpyPeer(dptr_peer, deviceId, ptrA, 0, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), dptr_peer, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipFree(dptr_peer)); + } + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - VM to VM copy: Allocate memory and map it to an address space in + * device 0(PtrA). Allocate another chunk of memory and map it to an + * address space in device 0(PtrB). Check if data can be copied from + * PtrA -> PtrB using hipMemcpy. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMMemCpy") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + // Allocate physical memory + hipMemGenericAllocationHandle_t handle1, handle2; + HIP_CHECK(hipMemCreate(&handle1, size_mem, &prop, 0)); + HIP_CHECK(hipMemCreate(&handle2, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA, ptrB; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrB, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle1, 0)); + HIP_CHECK(hipMemMap(ptrB, size_mem, 0, handle2, 0)); + HIP_CHECK(hipMemRelease(handle1)); + HIP_CHECK(hipMemRelease(handle2)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the addresses accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + HIP_CHECK(hipMemSetAccess(ptrB, size_mem, &accessDesc, 1)); + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoD(ptrB, ptrA, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrB, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemUnmap(ptrB, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrB, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Functional Test: Allocate memory and map it to an address space in + * device 0(PtrA). Allocate another chunk of memory and map it to an + * address space in device 1(PtrB). Check if data can be copied from + * PtrA -> PtrB using hipMemcpyPeer. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0, value = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + for (deviceId = 1; deviceId < devicecount; deviceId++) { + int canAccessPeer = 0; + hipDevice_t device_other; + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, deviceId)); + if (0 == canAccessPeer) { + WARN("Machine does not support Peer Access\n"); + break; + } + std::fill(B_h.begin(), B_h.end(), initializer); + HIP_CHECK(hipDeviceGet(&device_other, deviceId)); + HIP_CHECK(hipDeviceGetAttribute(&value, hipDeviceAttributeVirtualMemoryManagementSupported, + device_other)); + if (value == 0) { + // Virtual Memory Mgmt is not supported + WARN("Machine does not support Virtual Memory Management\n"); + break; + } + HIP_CHECK(hipSetDevice(deviceId)); + hipMemAllocationProp prop_loc{}; + prop_loc.type = hipMemAllocationTypePinned; + prop_loc.location.type = hipMemLocationTypeDevice; + prop_loc.location.id = device_other; // Current Devices + HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop_loc, + hipMemAllocationGranularityMinimum)); + size_t size_mem_loc = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle_loc; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle_loc, size_mem_loc, &prop_loc, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrB; + HIP_CHECK(hipMemAddressReserve(&ptrB, size_mem_loc, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrB, size_mem_loc, 0, handle, 0)); + HIP_CHECK(hipMemRelease(handle_loc)); + // Set access + hipMemAccessDesc accessDesc_loc = {}; + accessDesc_loc.location.type = hipMemLocationTypeDevice; + accessDesc_loc.location.id = device_other; + accessDesc_loc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrB, size_mem_loc, &accessDesc_loc, 1)); + HIP_CHECK(hipMemcpyPeer(ptrB, deviceId, ptrA, 0, buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrB, buffer_size)); + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + HIP_CHECK(hipMemUnmap(ptrB, size_mem_loc)); + HIP_CHECK(hipMemAddressFree(ptrB, size_mem_loc)); + } + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a chunk of memory and map it to device0. Allocate another + * chunk of memory and map it to device1. Check if these 2 distinct memory + * chunks can be mapped to a single address space. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_MapPhysChksFromMulDev") { + int devicecount = 0; + HIP_CHECK(hipGetDeviceCount(&devicecount)); + int numOfBuffers = devicecount; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int), granularity = 0; + int deviceId = 0; + // Allocate resources for all gpus + hipMemGenericAllocationHandle_t* handle = static_cast( + malloc(sizeof(hipMemGenericAllocationHandle_t) * numOfBuffers)); + REQUIRE(handle != nullptr); + size_t* size_mem = static_cast(malloc(sizeof(size_t) * numOfBuffers)); + REQUIRE(size_mem != nullptr); + size_t total_mem = 0; + // Create memory chunks + for (deviceId = 0; deviceId < numOfBuffers; deviceId++) { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop_loc{}; + prop_loc.type = hipMemAllocationTypePinned; + prop_loc.location.type = hipMemLocationTypeDevice; + prop_loc.location.id = device; // Current Devices + HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop_loc, + hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_mem[deviceId] = ((granularity + buffer_size - 1) / granularity) * granularity; + total_mem = total_mem + size_mem[deviceId]; + // Allocate physical memory chunks + HIP_CHECK(hipMemCreate(&handle[deviceId], size_mem[deviceId], &prop_loc, 0)); + } + // Allocate virtual address range for all the memory chunks + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, total_mem, 0, 0, 0)); + // Map the allocated chunks + for (deviceId = 0; deviceId < numOfBuffers; deviceId++) { + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + uint64_t uiptr = reinterpret_cast(ptrA); + uiptr = uiptr + deviceId * size_mem[deviceId]; + HIP_CHECK( + hipMemMap(reinterpret_cast(uiptr), size_mem[deviceId], 0, handle[deviceId], 0)); + HIP_CHECK(hipMemRelease(handle[deviceId])); + // Set access + hipMemAccessDesc accessDesc_loc = {}; + accessDesc_loc.location.type = hipMemLocationTypeDevice; + accessDesc_loc.location.id = device; + accessDesc_loc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to deviceId + HIP_CHECK( + hipMemSetAccess(reinterpret_cast(uiptr), size_mem[deviceId], &accessDesc_loc, 1)); + } + std::vector A_h(numOfBuffers * N), B_h(numOfBuffers * N); + // Fill Data + for (int idx = 0; idx < (numOfBuffers * N); idx++) { + A_h[idx] = idx * idx; + } + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), numOfBuffers * buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, numOfBuffers * buffer_size)); + // Validate Results + REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); + for (deviceId = 0; deviceId < numOfBuffers; deviceId++) { + uint64_t uiptr = reinterpret_cast(ptrA); + uiptr = uiptr + deviceId * size_mem[deviceId]; + HIP_CHECK(hipMemUnmap(reinterpret_cast(uiptr), size_mem[deviceId])); + } + HIP_CHECK(hipMemAddressFree(ptrA, total_mem)); + free(handle); + free(size_mem); +} + +class vmm_resize_class { + size_t current_size_tot; + size_t current_size_rounded_tot; + hipDeviceptr_t ptrVmm; + std::vector vhandle; + std::vector vsize; + // allocate initial VMM memory chunk + int allocate_vmm(hipDeviceptr_t* ptr, hipDevice_t device, size_t size) { + size_t granularity = 0; + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_rounded = ((granularity + size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_rounded, &prop, 0)); + // Store the handle for future reference + vhandle.push_back(handle); + vsize.push_back(size_rounded); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrVmm, size_rounded, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrVmm, size_rounded, 0, handle, 0)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU device + HIP_CHECK(hipMemSetAccess(ptrVmm, size_rounded, &accessDesc, 1)); + *ptr = ptrVmm; + current_size_tot += size; + current_size_rounded_tot += size_rounded; + return 0; + } + + public: + vmm_resize_class(hipDeviceptr_t* ptr, hipDevice_t device, size_t size) + : current_size_tot(0), current_size_rounded_tot(0) { + allocate_vmm(ptr, device, size); + } + // Free all VMM + void free_vmm() { + for (hipMemGenericAllocationHandle_t& myhandle : vhandle) { + HIP_CHECK(hipMemRelease(myhandle)); + } + HIP_CHECK(hipMemUnmap(ptrVmm, current_size_rounded_tot)); + HIP_CHECK(hipMemAddressFree(ptrVmm, current_size_rounded_tot)); + } + // grow memory chunk + int grow_vmm(hipDeviceptr_t* ptr, hipDevice_t device, size_t size) { + size_t granularity = 0; + if (size <= current_size_tot) { + return -1; + } + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + // diff size + size_t grow_size = (size - current_size_tot); + size_t size_rounded = ((granularity + grow_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_rounded, &prop, 0)); + // Store the handle for future reference + vhandle.push_back(handle); + vsize.push_back(size_rounded); + // Allocate virtual address range + // Unmap and Free the old vmm + HIP_CHECK(hipMemUnmap(ptrVmm, current_size_rounded_tot)); + HIP_CHECK(hipMemAddressFree(ptrVmm, current_size_rounded_tot)); + HIP_CHECK(hipMemAddressReserve(&ptrVmm, (size_rounded + current_size_rounded_tot), 0, 0, 0)); + int idx = 0; + for (hipMemGenericAllocationHandle_t& myhandle : vhandle) { + if (idx == 0) { + HIP_CHECK(hipMemMap(ptrVmm, vsize[idx], 0, myhandle, 0)); + } else { + uint64_t uiptr = reinterpret_cast(ptrVmm); + uiptr = uiptr + vsize[idx - 1]; + HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), vsize[idx], 0, myhandle, 0)); + } + idx++; + } + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + // Make the address accessible to GPU 0 + HIP_CHECK(hipMemSetAccess(ptrVmm, (size_rounded + current_size_rounded_tot), &accessDesc, 1)); + *ptr = ptrVmm; + current_size_tot += size; + current_size_rounded_tot += size_rounded; + return 0; + } +}; + +/** + * Test Description + * ------------------------ + * - Testing memory resize: Allocate physical memory and map it to virtual + * address range (PtrA). After setting device permission, copy data from + * host to device. Allocate another chunk of memory of a different size. + * Map the new chunk to offset (PtrA + size of old chunk). + * After setting device permission, copy data from host to device at + * offset (PtrA + size of old chunk). Validate both the old data and new + * data after copying back to host. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_GrowVMM") { + hipDeviceptr_t ptr; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + // Create VMM Object of size buffer_size + vmm_resize_class resizeobj(&ptr, device, buffer_size); + // Inititalize Host Buffer + int* ptrA_h = static_cast(malloc(buffer_size)); + REQUIRE(ptrA_h != nullptr); + for (int idx = 0; idx < N; idx++) { + ptrA_h[idx] = idx; + } + // Copy to VMM + HIP_CHECK(hipMemcpyHtoD(ptr, ptrA_h, buffer_size)); + // Resize the VMM + int Nnew = NEW_DATA_SIZE; + size_t buffer_size_new = Nnew * sizeof(int); + if (-1 == resizeobj.grow_vmm(&ptr, device, buffer_size_new)) { + WARN("Virtual Memory Management Grow Failed"); + return; + } + free(ptrA_h); + ptrA_h = static_cast(malloc(buffer_size_new - buffer_size)); + REQUIRE(ptrA_h != nullptr); + for (int idx = 0; idx < (Nnew - N); idx++) { + ptrA_h[idx] = N + idx; + } + int* ptrB_h = static_cast(malloc(buffer_size_new)); + REQUIRE(ptrB_h != nullptr); + uint64_t uiptr = reinterpret_cast(ptr); + uiptr = uiptr + buffer_size; + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(uiptr), ptrA_h, (buffer_size_new - buffer_size))); + HIP_CHECK(hipMemcpyDtoH(ptrB_h, ptr, buffer_size_new)); + bool bPassed = true; + for (int idx = 0; idx < Nnew; idx++) { + if (ptrB_h[idx] != idx) { + bPassed = false; + break; + } + } + REQUIRE(bPassed == true); + free(ptrB_h); + free(ptrA_h); + resizeobj.free_vmm(); +} + +std::atomic bTestPassed{1}; +#define NUM_THREADS 5 +void test_thread(hipDevice_t device) { + hipDeviceptr_t ptr; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + // Create VMM Object of size buffer_size + vmm_resize_class vmmobj(&ptr, device, buffer_size); + // Inititalize Host Buffer + int* ptrA_h = static_cast(malloc(buffer_size)); + REQUIRE(ptrA_h != nullptr); + for (int idx = 0; idx < N; idx++) { + ptrA_h[idx] = idx; + } + // Copy to VMM + HIP_CHECK(hipMemcpyHtoD(ptr, ptrA_h, buffer_size)); + int* ptrB_h = static_cast(malloc(buffer_size)); + REQUIRE(ptrB_h != nullptr); + HIP_CHECK(hipMemcpyDtoH(ptrB_h, ptr, buffer_size)); + bool bPassed = true; + for (int idx = 0; idx < N; idx++) { + if (ptrB_h[idx] != idx) { + bPassed = false; + break; + } + } + if (bPassed) { + bTestPassed.fetch_and(1); + } else { + bTestPassed.fetch_and(0); + } + free(ptrB_h); + free(ptrA_h); + vmmobj.free_vmm(); +} + +/** + * Test Description + * ------------------------ + * - Multithreaded test: Allocate unique virtual memory chunks from + * multiple threads. Transfer data to these chunks from host and execute + * kernel function on these data. Validate the results. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_Multithreaded") { + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + std::thread T[NUM_THREADS]; + for (int i = 0; i < NUM_THREADS; i++) { + T[i] = std::thread(test_thread, device); + } + // Wait until all the threads finish their execution + for (int i = 0; i < NUM_THREADS; i++) { + T[i].join(); + } + REQUIRE(1 == bTestPassed.load()); +} + +#ifdef __linux__ + +bool test_mprocess() { + int fd[2]; + bool testResult = false; + pid_t childpid; + int testResultChild = 0; + int deviceId = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + // create pipe descriptors + pipe(fd); + // fork process + childpid = fork(); + if (childpid > 0) { // Parent + close(fd[1]); + hipDeviceptr_t ptr; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupportedRetVal(device); + // Create VMM Object of size buffer_size + vmm_resize_class vmmobj(&ptr, device, buffer_size); + // Inititalize Host Buffer + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Copy to VMM + HIP_CHECK(hipMemcpyHtoD(ptr, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptr, buffer_size)); + bool bPassed = std::equal(B_h.begin(), B_h.end(), A_h.data()); + vmmobj.free_vmm(); + // parent will wait to read the device cnt + read(fd[0], &testResultChild, sizeof(int)); + if (testResultChild == 0) { + testResult = bPassed & false; + } else { + testResult = bPassed & true; + } + // close the read-descriptor + close(fd[0]); + // wait for child exit + wait(NULL); + } else if (!childpid) { // Child + close(fd[0]); + hipDeviceptr_t ptr; + hipDevice_t device; + + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupportedRetVal(device); + // Create VMM Object of size buffer_size + vmm_resize_class vmmobj(&ptr, device, buffer_size); + // Inititalize Host Buffer + std::vector A_h(N), B_h(N); + for (int idx = 0; idx < N; idx++) { + A_h[idx] = idx; + } + // Copy to VMM + HIP_CHECK(hipMemcpyHtoD(ptr, A_h.data(), buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptr, buffer_size)); + int result = 0; + if (true == std::equal(B_h.begin(), B_h.end(), A_h.data())) { + result = 1; + } + vmmobj.free_vmm(); + // send the value on the write-descriptor: + write(fd[1], &result, sizeof(int)); + // close the write descriptor: + close(fd[1]); + exit(0); + } + return testResult; +} + +/** + * Test Description + * ------------------------ + * - Multiprocess test: Allocate unique virtual memory chunks from + * multiple processes. Transfer data to these chunks from host and + * execute kernel function on these data. Validate the results. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_MultiProc") { REQUIRE(true == test_mprocess()); } + +#endif + +/** + * Test Description + * ------------------------ + * - Negative Tests for hipMemSetAccess() + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_negative") { + size_t granularity = 0; + constexpr int N = DATA_SIZE; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemGenericAllocationHandle_t handle; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + hipDeviceptr_t ptrA; + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + // Set access + hipMemAccessDesc accessDesc = {}; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = device; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + + SECTION("nullptr to ptrA") { + REQUIRE(hipMemSetAccess(nullptr, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("pass zero to size") { + REQUIRE(hipMemSetAccess(&ptrA, 0, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("pass a size greater than reserved size") { + REQUIRE(hipMemSetAccess(&ptrA, size_mem + 1, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("pass a size less than reserved size") { + REQUIRE(hipMemSetAccess(&ptrA, size_mem - 1, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("invalid location type") { + accessDesc.location.type = hipMemLocationTypeInvalid; + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("invalid id") { + accessDesc.location.id = -1; + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("pass location id as > highest device number") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + accessDesc.location.id = numDevices; // set to non existing device + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION("invalid flag") { + accessDesc.flags = static_cast(-1); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + SECTION(" pass zero to count") { + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 0) == hipErrorInvalidValue); + } + + SECTION("pass desc as nullptr") { + REQUIRE(hipMemSetAccess(&ptrA, size_mem, nullptr, 1) == hipErrorInvalidValue); + } + + SECTION("uninitialized virtual memory") { + hipDeviceptr_t ptrB; + HIP_CHECK(hipMemAddressReserve(&ptrB, size_mem, 0, 0, 0)); + REQUIRE(hipMemSetAccess(&ptrB, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + SECTION("unmapped virtual memory") { + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); + } + + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); + HIP_CHECK(hipMemRelease(handle)); +} diff --git a/catch/unit/virtualMemoryManagement/hipMemUnmap.cc b/catch/unit/virtualMemoryManagement/hipMemUnmap.cc new file mode 100644 index 0000000000..eeadb83099 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemUnmap.cc @@ -0,0 +1,88 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemUnmap hipMemUnmap + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemUnmap (void* ptr, size_t size)` - + * Unmap memory allocation of a given address range. + */ + + +#include + +#include "hip_vmm_common.hh" + +constexpr int N = (1 << 13); + +/** + * Test Description + * ------------------------ + * - Negative Tests + * ------------------------ + * - unit/virtualMemoryManagement/hipMemUnmap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemUnmap_negative") { + size_t granularity = 0; + size_t buffer_size = N * sizeof(int); + int deviceId = 0; + hipDevice_t device; + + HIP_CHECK(hipDeviceGet(&device, deviceId)); + checkVMMSupported(device); + + hipMemAllocationProp prop{}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; // Current Devices + + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + REQUIRE(granularity > 0); + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; + + hipMemGenericAllocationHandle_t handle; + hipDeviceptr_t ptrA; + // Allocate physical memory + HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); + // Allocate virtual address range + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); + + SECTION("nullptr to ptrA") { REQUIRE(hipMemUnmap(nullptr, size_mem) == hipErrorInvalidValue); } + + SECTION("pass zero to size") { REQUIRE(hipMemUnmap(ptrA, 0) == hipErrorInvalidValue); } + +#if HT_NVIDIA + SECTION("unmap a smaller size") { + REQUIRE(hipMemUnmap(ptrA, (size_mem - 1)) == hipErrorInvalidValue); + } +#endif + + HIP_CHECK(hipMemRelease(handle)); + HIP_CHECK(hipMemUnmap(ptrA, size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); +} diff --git a/catch/unit/virtualMemoryManagement/hipMemVmm_old.cc b/catch/unit/virtualMemoryManagement/hipMemVmm_old.cc new file mode 100644 index 0000000000..c2258f057d --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemVmm_old.cc @@ -0,0 +1,95 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* Test Case Description: + 1) This testcase verifies the basic scenario - supported on + all devices +*/ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + This testcase verifies HIP Mem VMM API basic scenario - supported on all devices + */ + +TEST_CASE("Unit_hipMemVmm_Basic") { + int vmm = 0; + HIP_CHECK(hipDeviceGetAttribute(&vmm, hipDeviceAttributeVirtualMemoryManagementSupported, 0)); + INFO("hipDeviceAttributeVirtualMemoryManagementSupported: " << vmm); + + if (vmm == 0) { + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeVirtualMemoryManagement " + "attribute. Hence skipping the testing with Pass result.\n"); + return; + } + + size_t granularity = 0; + + hipMemAllocationProp memAllocationProp; + memAllocationProp.type = hipMemAllocationTypePinned; + memAllocationProp.location.id = 0; + memAllocationProp.location.type = hipMemLocationTypeDevice; + + HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &memAllocationProp, + hipMemAllocationGranularityRecommended)); + + size_t size = 4 * 1024; + void* reservedAddress{nullptr}; + HIP_CHECK(hipMemAddressReserve(&reservedAddress, size, granularity, nullptr, 0)); + + hipMemGenericAllocationHandle_t gaHandle{nullptr}; + HIP_CHECK(hipMemCreate(&gaHandle, size, &memAllocationProp, 0)); + + HIP_CHECK(hipMemMap(reservedAddress, size, 0, gaHandle, 0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + hipMemAccessDesc desc; + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = device; + desc.flags = hipMemAccessFlagsProtReadWrite; + std::vector values(size); + const char value = 1; + + HIP_CHECK(hipMemSetAccess(reservedAddress, size, &desc, 1)); + HIP_CHECK(hipMemset(reservedAddress, value, size)); + HIP_CHECK(hipMemcpy(&values[0], reservedAddress, size, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < size; ++i) { + REQUIRE(values[i] == value); + } + + HIP_CHECK(hipMemUnmap(reservedAddress, size)); + + HIP_CHECK(hipMemRelease(gaHandle)); + HIP_CHECK(hipMemAddressFree(reservedAddress, size)); +} diff --git a/catch/unit/virtualMemoryManagement/hip_vmm_common.hh b/catch/unit/virtualMemoryManagement/hip_vmm_common.hh new file mode 100644 index 0000000000..a43af62758 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hip_vmm_common.hh @@ -0,0 +1,49 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "hip_test_context.hh" + +#define checkVMMSupported(device) \ + { \ + int value = 0; \ + hipDeviceAttribute_t attr = hipDeviceAttributeVirtualMemoryManagementSupported; \ + HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); \ + if (value == 0) { \ + HipTest::HIP_SKIP_TEST("Machine does not support VMM. Skipping Test.."); \ + return; \ + } \ + } + +#define checkVMMSupportedRetVal(device) \ + { \ + int value = 0; \ + hipDeviceAttribute_t attr = hipDeviceAttributeVirtualMemoryManagementSupported; \ + HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); \ + if (value == 0) { \ + HipTest::HIP_SKIP_TEST("Machine does not support VMM. Skipping Test.."); \ + return true; \ + } \ + } + +constexpr int threadsPerBlk = 64; \ No newline at end of file diff --git a/catch/unit/vulkan_interop/CMakeLists.txt b/catch/unit/vulkan_interop/CMakeLists.txt index a0c39ebb0b..a0af39c7b7 100644 --- a/catch/unit/vulkan_interop/CMakeLists.txt +++ b/catch/unit/vulkan_interop/CMakeLists.txt @@ -10,6 +10,19 @@ set(TEST_SRC hipDestroyExternalSemaphore.cc ) +if(UNIX) + set(TEST_SRC ${TEST_SRC} + hipGraphAddExternalSemaphoresSignalNode.cc + hipGraphAddExternalSemaphoresWaitNode.cc + hipGraphExternalSemaphoresSignalNodeGetParams.cc + hipGraphExternalSemaphoresSignalNodeSetParams.cc + hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + hipGraphExternalSemaphoresWaitNodeSetParams.cc + hipGraphExternalSemaphoresWaitNodeGetParams.cc + hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + ) +endif() + if(WIN32) set(Vulkan_LIBRARY $ENV{VULKAN_SDK}/Lib/vulkan-l) else() diff --git a/catch/unit/vulkan_interop/graph_tests_common.hh b/catch/unit/vulkan_interop/graph_tests_common.hh new file mode 100644 index 0000000000..bb28ec5ea5 --- /dev/null +++ b/catch/unit/vulkan_interop/graph_tests_common.hh @@ -0,0 +1,76 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include + +template void GraphAddNodeCommonNegativeTests(F f, hipGraph_t graph) { + hipGraphNode_t node = nullptr; + SECTION("graph == nullptr") { + HIP_CHECK_ERROR(f(&node, nullptr, nullptr, 0), hipErrorInvalidValue); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(f(nullptr, graph, nullptr, 0), hipErrorInvalidValue); + } + + SECTION("dependencies == nullptr with size != 0") { + HIP_CHECK_ERROR(f(&node, graph, nullptr, 1), hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-202 +#if HT_NVIDIA + SECTION("Node in dependency is from different graph") { + hipGraph_t other_graph = nullptr; + HIP_CHECK(hipGraphCreate(&other_graph, 0)); + hipGraphNode_t other_node = nullptr; + HIP_CHECK(hipGraphAddEmptyNode(&other_node, other_graph, nullptr, 0)); + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddEmptyNode(&node, graph, nullptr, 0)); + HIP_CHECK_ERROR(f(&node, graph, &other_node, 1), hipErrorInvalidValue); + HIP_CHECK(hipGraphDestroy(other_graph)); + } +#endif + + SECTION("Invalid numNodes") { + hipGraphNode_t dep_node = nullptr; + HIP_CHECK(hipGraphAddEmptyNode(&dep_node, graph, nullptr, 0)); + HIP_CHECK_ERROR(f(&node, graph, &dep_node, 2), hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-201 +#if HT_NVIDIA + SECTION("Duplicate node in dependencies") { + hipGraphNode_t dep_node = nullptr; + // Need to create two nodes to avoid overlap with Invalid numNodes case + // First one is left dangling as the graph will be destroyed after the section anyway + HIP_CHECK(hipGraphAddEmptyNode(&dep_node, graph, nullptr, 0)); + HIP_CHECK(hipGraphAddEmptyNode(&dep_node, graph, nullptr, 0)); + hipGraphNode_t deps[] = {dep_node, dep_node}; + HIP_CHECK_ERROR(f(&node, graph, deps, 2), hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc new file mode 100644 index 0000000000..7d6e80971d --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc @@ -0,0 +1,139 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifdef _WIN64 +#define NOMINMAX +#endif /* _WIN64 */ + +#include + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" +#include "graph_tests_common.hh" + +/** + * @addtogroup hipGraphAddExternalSemaphoresSignalNode hipGraphAddExternalSemaphoresSignalNode + * @{ + * @ingroup GraphTest + * `hipGraphAddExternalSemaphoresSignalNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const + * hipGraphNode_t* pDependencies, size_t numDependencies, const + * hipExternalSemaphoreSignalNodeParams* nodeParams);` - Creates a external semaphor signal node and + * adds it to a graph. + */ + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates an external Vulkan binary semaphore. + * - Creates a Vulkan fence and signals semaphore asynchronously. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Positive_Basic") { + SignalExternalSemaphoreCommon(GraphExtSemaphoreSignalWrapper<>); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Creates an external Vulkan timeline semaphore. + * - Imports the semaphore and signals. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Vulkan_Positive_Timeline_Semaphore") { + SignalExternalTimelineSemaphoreCommon(GraphExtSemaphoreSignalWrapper<>); +} + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates multiple external Vulkan binary semaphores. + * - Createas a Vulkan fence and signals semaphores. + * - Waits for the operations to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Vulkan_Positive_Multiple_Semaphores") { + SignalExternalMultipleSemaphoresCommon(GraphExtSemaphoreSignalWrapper<>); +} +#endif + + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphAddExternalSemaphoresSignalNode behavior with invalid arguments: + * -# Nullptr graph + * -# Nullptr graph node + * -# Invalid numDependencies for null list of dependencies + * -# Node in dependency is from different graph + * -# Invalid numNodes + * -# Duplicate node in dependencies + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Vulkan_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + + GraphAddNodeCommonNegativeTests( + std::bind(hipGraphAddExternalSemaphoresSignalNode, _1, _2, _3, _4, &node_params), graph); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc new file mode 100644 index 0000000000..17d2f87195 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc @@ -0,0 +1,138 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifdef _WIN64 +#define NOMINMAX +#endif /* _WIN64 */ + +#include + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" +#include "graph_tests_common.hh" + +/** + * @addtogroup hipGraphAddExternalSemaphoresWaitNode hipGraphAddExternalSemaphoresWaitNode + * @{ + * @ingroup GraphTest + * `hipGraphAddExternalSemaphoresWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const + * hipGraphNode_t* pDependencies, size_t numDependencies, const hipExternalSemaphoreWaitNodeParams* + * nodeParams)` - Creates a external semaphor wait node and adds it to a graph. + */ + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates an external Vulkan binary semaphore. + * - Creates a Vulkan fence and signals semaphore asynchronously. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Positive_Basic") { + WaitExternalSemaphoreCommon(GraphExtSemaphoreWaitWrapper<>); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Creates an external Vulkan timeline semaphore. + * - Imports the semaphore and signals. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Vulkan_Positive_Timeline_Semaphore") { + WaitExternalTimelineSemaphoreCommon(GraphExtSemaphoreWaitWrapper<>); +} +#endif + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates multiple external Vulkan binary semaphores. + * - Createas a Vulkan fence and signals semaphores. + * - Waits for the operations to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Vulkan_Positive_Multiple_Semaphores") { + WaitExternalMultipleSemaphoresCommon(GraphExtSemaphoreWaitWrapper<>); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphAddExternalSemaphoresWaitNode behavior with invalid arguments: + * -# Nullptr graph + * -# Nullptr graph node + * -# Invalid numDependencies for null list of dependencies + * -# Node in dependency is from different graph + * -# Invalid numNodes + * -# Duplicate node in dependencies + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Vulkan_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + + GraphAddNodeCommonNegativeTests( + std::bind(hipGraphAddExternalSemaphoresWaitNode, _1, _2, _3, _4, &node_params), graph); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc new file mode 100644 index 0000000000..79d970b950 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc @@ -0,0 +1,194 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" + +/** + * @addtogroup hipGraphExecExternalSemaphoresSignalNodeSetParams + * hipGraphExecExternalSemaphoresSignalNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExecExternalSemaphoresSignalNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t + * hNode, const hipExternalSemaphoreSignalNodeParams* nodeParams)` - Updates node parameters in the + * external semaphore signal node in the given graphExec. + */ + +static hipError_t GraphExecSemaphoreSetParamsSignalWrapper( + hipExternalSemaphore_t* extSemArray, hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + hipExternalSemaphoreSignalParams* signal_params = + new hipExternalSemaphoreSignalParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + signal_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreSignalNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = signal_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &initial_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphExecExternalSemaphoresSignalNodeSetParams(graph_exec, node, &node_params)); + + hipExternalSemaphoreSignalNodeParams retrieved_params{}; + memset(&retrieved_params, 0, sizeof(hipExternalSemaphoreSignalNodeParams)); + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + delete[] signal_params; + + return hipSuccess; +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node signals the external binary semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Positive_Basic") { + SignalExternalSemaphoreCommon(GraphExecSemaphoreSetParamsSignalWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node signals the external timeline semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + SignalExternalTimelineSemaphoreCommon(GraphExecSemaphoreSetParamsSignalWrapper); +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node signals the external binary semaphores and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + SignalExternalMultipleSemaphoresCommon(GraphExecSemaphoreSetParamsSignalWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExecExternalSemaphoresSignalNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graphexec + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + SECTION("pGraphExec == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresSignalNodeSetParams(nullptr, node, &node_params), + hipErrorInvalidValue); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR( + hipGraphExecExternalSemaphoresSignalNodeSetParams(graph_exec, nullptr, &node_params), + hipErrorInvalidValue); + } + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresSignalNodeSetParams(graph_exec, node, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc new file mode 100644 index 0000000000..51910b880e --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc @@ -0,0 +1,191 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" + +/** + * @addtogroup hipGraphExecExternalSemaphoresWaitNodeSetParams + * hipGraphExecExternalSemaphoresWaitNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExecExternalSemaphoresWaitNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + * const hipExternalSemaphoreWaitNodeParams* nodeParams)` - Updates node parameters in the external + * semaphore wait node in the given graphExec. + */ + +static hipError_t GraphExecSemaphoreSetParamsWaitWrapper( + hipExternalSemaphore_t* extSemArray, hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + hipExternalSemaphoreWaitParams* wait_params = new hipExternalSemaphoreWaitParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + wait_params[i].flags = 0; + wait_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreWaitNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = wait_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &initial_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphExecExternalSemaphoresWaitNodeSetParams(graph_exec, node, &node_params)); + + hipExternalSemaphoreWaitNodeParams retrieved_params{}; + memset(&retrieved_params, 0, sizeof(hipExternalSemaphoreWaitNodeParams)); + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + delete[] wait_params; + + return hipSuccess; +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node waits for the external binary semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Positive_Basic") { + WaitExternalSemaphoreCommon(GraphExecSemaphoreSetParamsWaitWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node waits for the external timeline semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + WaitExternalTimelineSemaphoreCommon(GraphExecSemaphoreSetParamsWaitWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node waits for the external binary semaphores and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + WaitExternalMultipleSemaphoresCommon(GraphExecSemaphoreSetParamsWaitWrapper); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExecExternalSemaphoresWaitNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graphexec + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + SECTION("pGraphExec == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresWaitNodeSetParams(nullptr, node, &node_params), hipErrorInvalidValue); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR( + hipGraphExecExternalSemaphoresWaitNodeSetParams(graph_exec, nullptr, &node_params), hipErrorInvalidValue); + } + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresWaitNodeSetParams(graph_exec, node, nullptr), hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc new file mode 100644 index 0000000000..6f6c3c2787 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresSignalNodeGetParams + * hipGraphExternalSemaphoresSignalNodeGetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresSignalNodeGetParams(hipGraphNode_t hNode, + * hipExternalSemaphoreSignalNodeParams* params_out)` - Returns external semaphore signal node + * params. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Positive_Basic + * - @ref Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Timeline_Semaphore + * - @ref Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Multiple_Semaphores + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresSignalNodeGetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * -# Node is destroyed + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeGetParams_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + hipExternalSemaphoreSignalNodeParams retrieved_params; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeGetParams(nullptr, &retrieved_params), + hipErrorInvalidValue); + } + + SECTION("params_out == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeGetParams(node, nullptr), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-208 +#if HT_NVIDIA + SECTION("Node is destroyed") { + hipGraph_t graph_temp = nullptr; + HIP_CHECK(hipGraphCreate(&graph_temp, 0)); + hipGraphNode_t node_temp = nullptr; + HIP_CHECK( + hipGraphAddExternalSemaphoresSignalNode(&node_temp, graph_temp, nullptr, 0, &node_params)); + HIP_CHECK(hipGraphDestroy(graph_temp)); + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeGetParams(node_temp, &retrieved_params), + hipErrorInvalidValue); + } +#endif +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc new file mode 100644 index 0000000000..8f964d966d --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc @@ -0,0 +1,137 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresSignalNodeSetParams + * hipGraphExternalSemaphoresSignalNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresSignalNodeSetParams(hipGraphNode_t hNode, const + * hipExternalSemaphoreSignalNodeParams* nodeParams)` - Updates node parameters in the external + * semaphore signal node. + */ + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node signals the external binary semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Positive_Basic") { + SignalExternalSemaphoreCommon(GraphExtSemaphoreSignalWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node signals the external timeline semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + SignalExternalTimelineSemaphoreCommon(GraphExtSemaphoreSignalWrapper); +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node signals the external binary semaphores and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + SignalExternalMultipleSemaphoresCommon(GraphExtSemaphoreSignalWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresSignalNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeSetParams(nullptr, &node_params), + hipErrorInvalidValue); + } + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeSetParams(node, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc new file mode 100644 index 0000000000..b6c0034c04 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresWaitNodeGetParams + * hipGraphExternalSemaphoresWaitNodeGetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresWaitNodeGetParams(hipGraphNode_t hNode, + * hipExternalSemaphoreWaitNodeParams* params_out)` - Returns external semaphore wait node params. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Positive_Basic + * - @ref Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Timeline_Semaphore + * - @ref Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Multiple_Semaphores + */ + + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresWaitNodeGetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * -# Node is destroyed + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeGetParams_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + hipExternalSemaphoreWaitNodeParams retrieved_params; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeGetParams(nullptr, &retrieved_params), + hipErrorInvalidValue); + } + + SECTION("params_out == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeGetParams(node, nullptr), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-208 +#if HT_NVIDIA + SECTION("Node is destroyed") { + hipGraph_t graph_temp = nullptr; + HIP_CHECK(hipGraphCreate(&graph_temp, 0)); + hipGraphNode_t node_temp = nullptr; + HIP_CHECK( + hipGraphAddExternalSemaphoresWaitNode(&node_temp, graph_temp, nullptr, 0, &node_params)); + HIP_CHECK(hipGraphDestroy(graph_temp)); + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeGetParams(node_temp, &retrieved_params), + hipErrorInvalidValue); + } +#endif +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc new file mode 100644 index 0000000000..c21c810ac1 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresWaitNodeSetParams + * hipGraphExternalSemaphoresWaitNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresWaitNodeSetParams(hipGraphNode_t hNode, const + * hipExternalSemaphoreWaitNodeParams* nodeParams)` - Updates node parameters in the external + * semaphore wait node. + */ + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node waits for the external binary semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Positive_Basic") { + WaitExternalSemaphoreCommon(GraphExtSemaphoreWaitWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node waits for the external timeline semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + WaitExternalTimelineSemaphoreCommon(GraphExtSemaphoreWaitWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node waits for the external binary semaphores and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + WaitExternalMultipleSemaphoresCommon(GraphExtSemaphoreWaitWrapper); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresWaitNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeSetParams(nullptr, &node_params), + hipErrorInvalidValue); + } + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeSetParams(node, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc b/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc index 4485a3ad5f..64ae1e3637 100644 --- a/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc +++ b/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc @@ -20,152 +20,20 @@ THE SOFTWARE. */ #include "vulkan_test.hh" - -constexpr bool enable_validation = false; +#include "signal_semaphore_common.hh" TEST_CASE("Unit_hipSignalExternalSemaphoresAsync_Vulkan_Positive_Binary_Semaphore") { - VulkanTest vkt(enable_validation); - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - const auto command_buffer = vkt.GetCommandBuffer(); - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - hipExternalSemaphoreSignalParams signal_params = {}; - signal_params.params.fence.value = 0; - HIP_CHECK(hipSignalExternalSemaphoresAsync(&hip_ext_semaphore, &signal_params, 1, nullptr)); - HIP_CHECK(hipDeviceSynchronize()); - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - VkSemaphore waitSemaphores[] = {semaphore}; - // VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT or VK_PIPELINE_STAGE_TRANSFER_BIT can work - VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = waitSemaphores; - submit_info.pWaitDstStageMask = waitStages; - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); - PollStream(nullptr, hipSuccess); - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + SignalExternalSemaphoreCommon(hipSignalExternalSemaphoresAsync); } // Timeline semaphores unsupported on AMD #if HT_NVIDIA TEST_CASE("Unit_hipSignalExternalSemaphoresAsync_Vulkan_Positive_Timeline_Semaphore") { - VulkanTest vkt(enable_validation); - constexpr uint64_t signal_value = 2; - - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - - hipExternalSemaphoreSignalParams signal_params = {}; - signal_params.params.fence.value = signal_value; - - HIP_CHECK(hipSignalExternalSemaphoresAsync(&hip_ext_semaphore, &signal_params, 1, nullptr)); - PollStream(nullptr, hipSuccess); - - uint64_t sem_value = 0u; - VK_CHECK_RESULT(vkGetSemaphoreCounterValue(vkt.GetDevice(), semaphore, &sem_value)); - - REQUIRE(2 == sem_value); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + SignalExternalTimelineSemaphoreCommon(hipSignalExternalSemaphoresAsync); } TEST_CASE("Unit_hipSignalExternalSemaphoresAsync_Vulkan_Positive_Multiple_Semaphores") { - VulkanTest vkt(enable_validation); - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, - VK_BUFFER_USAGE_TRANSFER_DST_BIT); - -#if HT_AMD - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; -#else - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; -#endif - - const auto command_buffer = vkt.GetCommandBuffer(); - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - - const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_binary_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); - hipExternalSemaphore_t hip_binary_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); - - const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); - const auto hip_timeline_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); - hipExternalSemaphore_t hip_timeline_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, - &hip_timeline_sem_handle_desc)); - - uint64_t wait_values[] = {1, 0}; - VkTimelineSemaphoreSubmitInfo timeline_submit_info = {}; - timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - timeline_submit_info.waitSemaphoreValueCount = 2; - timeline_submit_info.pWaitSemaphoreValues = wait_values; - - VkSemaphore wait_semaphores[] = {timeline_semaphore, binary_semaphore}; - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.waitSemaphoreCount = 2; - submit_info.pWaitSemaphores = wait_semaphores; - submit_info.pNext = - second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? &timeline_submit_info : nullptr; - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - - REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); - - hipExternalSemaphoreSignalParams binary_signal_params = {}; - binary_signal_params.params.fence.value = 0; - hipExternalSemaphoreSignalParams timeline_signal_params = {}; - timeline_signal_params.params.fence.value = - second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 2 : 0; - hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; - hipExternalSemaphoreSignalParams signal_params[] = {binary_signal_params, - timeline_signal_params}; - HIP_CHECK(hipSignalExternalSemaphoresAsync(ext_semaphores, signal_params, 2, nullptr)); - - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); - HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); + SignalExternalMultipleSemaphoresCommon(hipSignalExternalSemaphoresAsync); } #endif diff --git a/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc b/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc index edebebe52a..ee8a175b6f 100644 --- a/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc +++ b/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc @@ -20,178 +20,21 @@ THE SOFTWARE. */ #include "vulkan_test.hh" - -constexpr bool enable_validation = false; +#include "wait_semaphore_common.hh" TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Positive_Binary_Semaphore") { - VulkanTest vkt(enable_validation); - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - const auto command_buffer = vkt.GetCommandBuffer(); - - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); - - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - - hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; - hip_ext_semaphore_wait_params.flags = 0; - hip_ext_semaphore_wait_params.params.fence.value = 0; - HIP_CHECK(hipWaitExternalSemaphoresAsync(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, - nullptr)); - PollStream(nullptr, hipErrorNotReady); - - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &semaphore; - - *src_storage.host_ptr = 42; - - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - - PollStream(nullptr, hipSuccess); - - REQUIRE(42 == *dst_storage.host_ptr); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + WaitExternalSemaphoreCommon(hipWaitExternalSemaphoresAsync); } // Timeline semaphores unsupported on AMD #if HT_NVIDIA TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Positive_Timeline_Semaphore") { - VulkanTest vkt(enable_validation); - - const auto [wait_value, signal_value] = - GENERATE(std::make_pair(2, 2), std::make_pair(2, 3), std::make_pair(3, 2)); - INFO("Wait value: " << wait_value << ", signal value: " << signal_value); - - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - - hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; - hip_ext_semaphore_wait_params.flags = 0; - hip_ext_semaphore_wait_params.params.fence.value = wait_value; - HIP_CHECK(hipWaitExternalSemaphoresAsync(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, - nullptr)); - PollStream(nullptr, hipErrorNotReady); - - VkSemaphoreSignalInfo signal_info = {}; - signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; - signal_info.semaphore = semaphore; - signal_info.value = signal_value; - VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); - if (wait_value > signal_value) { - PollStream(nullptr, hipErrorNotReady); - signal_info.value = wait_value; - VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); - } - PollStream(nullptr, hipSuccess); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + WaitExternalTimelineSemaphoreCommon(hipWaitExternalSemaphoresAsync); } #endif TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Positive_Multiple_Semaphores") { - VulkanTest vkt(enable_validation); - -#if HT_AMD - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; -#else - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; -#endif - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - const auto command_buffer = vkt.GetCommandBuffer(); - - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - - const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_binary_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); - hipExternalSemaphore_t hip_binary_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); - - const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); - const auto hip_timeline_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); - hipExternalSemaphore_t hip_timeline_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, &hip_timeline_sem_handle_desc)); - - hipExternalSemaphoreWaitParams binary_semaphore_wait_params = {}; - binary_semaphore_wait_params.params.fence.value = 0; - - hipExternalSemaphoreWaitParams timeline_semaphore_wait_params = {}; - timeline_semaphore_wait_params.params.fence.value = - second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 0; - - hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; - hipExternalSemaphoreWaitParams wait_params[] = {binary_semaphore_wait_params, - timeline_semaphore_wait_params}; - HIP_CHECK(hipWaitExternalSemaphoresAsync(ext_semaphores, wait_params, 2, nullptr)); - - PollStream(nullptr, hipErrorNotReady); - - if (second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) { - VkSemaphoreSignalInfo signal_info = {}; - signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; - signal_info.semaphore = timeline_semaphore; - signal_info.value = 1; - VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); - - PollStream(nullptr, hipErrorNotReady); - } - - VkSubmitInfo submit_info = {}; - VkSemaphore signal_semaphores[] = {binary_semaphore, timeline_semaphore}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.signalSemaphoreCount = second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 2; - submit_info.pSignalSemaphores = - second_semaphore_type == VK_SEMAPHORE_TYPE_MAX_ENUM ? &binary_semaphore : signal_semaphores; - - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - - PollStream(nullptr, hipSuccess); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); - HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); + WaitExternalMultipleSemaphoresCommon(hipWaitExternalSemaphoresAsync); } TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Negative_Parameters") { diff --git a/catch/unit/vulkan_interop/signal_semaphore_common.hh b/catch/unit/vulkan_interop/signal_semaphore_common.hh new file mode 100644 index 0000000000..2c8b2f1c86 --- /dev/null +++ b/catch/unit/vulkan_interop/signal_semaphore_common.hh @@ -0,0 +1,236 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +constexpr bool enable_validation = false; + +template void SignalExternalSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + const auto command_buffer = vkt.GetCommandBuffer(); + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 0; + HIP_CHECK(f(&hip_ext_semaphore, &signal_params, 1, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + VkSemaphore waitSemaphores[] = {semaphore}; + // VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT or VK_PIPELINE_STAGE_TRANSFER_BIT can work + VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = waitSemaphores; + submit_info.pWaitDstStageMask = waitStages; + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); + PollStream(nullptr, hipSuccess); + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} + +#if HT_NVIDIA +template void SignalExternalTimelineSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + constexpr uint64_t signal_value = 2; + + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = signal_value; + + HIP_CHECK(f(&hip_ext_semaphore, &signal_params, 1, nullptr)); + PollStream(nullptr, hipSuccess); + + uint64_t sem_value = 0u; + VK_CHECK_RESULT(vkGetSemaphoreCounterValue(vkt.GetDevice(), semaphore, &sem_value)); + + REQUIRE(2 == sem_value); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} + +template void SignalExternalMultipleSemaphoresCommon(F f) { + VulkanTest vkt(enable_validation); + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + +#if HT_AMD + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; +#else + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; +#endif + + const auto command_buffer = vkt.GetCommandBuffer(); + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + + const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_binary_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); + hipExternalSemaphore_t hip_binary_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); + + const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); + const auto hip_timeline_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); + hipExternalSemaphore_t hip_timeline_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, &hip_timeline_sem_handle_desc)); + + uint64_t wait_values[] = {1, 0}; + VkTimelineSemaphoreSubmitInfo timeline_submit_info = {}; + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; + timeline_submit_info.waitSemaphoreValueCount = 2; + timeline_submit_info.pWaitSemaphoreValues = wait_values; + + VkSemaphore wait_semaphores[] = {timeline_semaphore, binary_semaphore}; + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.waitSemaphoreCount = 2; + submit_info.pWaitSemaphores = wait_semaphores; + submit_info.pNext = + second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? &timeline_submit_info : nullptr; + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + + REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); + + hipExternalSemaphoreSignalParams binary_signal_params = {}; + binary_signal_params.params.fence.value = 0; + hipExternalSemaphoreSignalParams timeline_signal_params = {}; + timeline_signal_params.params.fence.value = + second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 2 : 0; + hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; + hipExternalSemaphoreSignalParams signal_params[] = {binary_signal_params, timeline_signal_params}; + HIP_CHECK(f(ext_semaphores, signal_params, 2, nullptr)); + + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); + HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); +} +#endif + +static inline bool operator==(const hipExternalSemaphoreSignalNodeParams& lhs, + const hipExternalSemaphoreSignalNodeParams& rhs) { + bool equal = true; + if (lhs.numExtSems != rhs.numExtSems) { + return false; + } + for (unsigned int i = 0; i < lhs.numExtSems; i++) { + if ((lhs.extSemArray[i] != rhs.extSemArray[i]) || + (lhs.paramsArray[i].params.fence.value != rhs.paramsArray[i].params.fence.value)) { + equal = false; + break; + } + } + return equal; +} + +template +hipError_t GraphExtSemaphoreSignalWrapper(hipExternalSemaphore_t* extSemArray, + hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + hipExternalSemaphoreSignalNodeParams retrieved_params = {}; + memset(&retrieved_params, 0, sizeof(retrieved_params)); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + if constexpr (set_params) { + hipExternalSemaphoreSignalParams* signal_params = + new hipExternalSemaphoreSignalParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + signal_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreSignalNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = signal_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &initial_params)); + + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeSetParams(node, &node_params)); + + delete[] signal_params; + } else { + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + } + + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeGetParams(node, &retrieved_params)); + REQUIRE(node_params == retrieved_params); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} diff --git a/catch/unit/vulkan_interop/wait_semaphore_common.hh b/catch/unit/vulkan_interop/wait_semaphore_common.hh new file mode 100644 index 0000000000..e590a6d54a --- /dev/null +++ b/catch/unit/vulkan_interop/wait_semaphore_common.hh @@ -0,0 +1,263 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +constexpr bool enable_validation = false; + +template void WaitExternalSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + const auto command_buffer = vkt.GetCommandBuffer(); + + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); + + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + + hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; + hip_ext_semaphore_wait_params.flags = 0; + hip_ext_semaphore_wait_params.params.fence.value = 0; + HIP_CHECK(f(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, nullptr)); + PollStream(nullptr, hipErrorNotReady); + + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &semaphore; + + *src_storage.host_ptr = 42; + + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + + PollStream(nullptr, hipSuccess); + + REQUIRE(42 == *dst_storage.host_ptr); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} + +#if HT_NVIDIA +template void WaitExternalTimelineSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + + const auto [wait_value, signal_value] = + GENERATE(std::make_pair(2, 2), std::make_pair(2, 3), std::make_pair(3, 2)); + INFO("Wait value: " << wait_value << ", signal value: " << signal_value); + + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + + hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; + hip_ext_semaphore_wait_params.flags = 0; + hip_ext_semaphore_wait_params.params.fence.value = wait_value; + HIP_CHECK(f(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, nullptr)); + PollStream(nullptr, hipErrorNotReady); + + VkSemaphoreSignalInfo signal_info = {}; + signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; + signal_info.semaphore = semaphore; + signal_info.value = signal_value; + VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); + if (wait_value > signal_value) { + PollStream(nullptr, hipErrorNotReady); + signal_info.value = wait_value; + VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); + } + PollStream(nullptr, hipSuccess); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} +#endif + +template void WaitExternalMultipleSemaphoresCommon(F f) { + VulkanTest vkt(enable_validation); + +#if HT_AMD + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; +#else + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; +#endif + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + const auto command_buffer = vkt.GetCommandBuffer(); + + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + + const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_binary_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); + hipExternalSemaphore_t hip_binary_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); + + const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); + const auto hip_timeline_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); + hipExternalSemaphore_t hip_timeline_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, &hip_timeline_sem_handle_desc)); + + hipExternalSemaphoreWaitParams binary_semaphore_wait_params = {}; + binary_semaphore_wait_params.params.fence.value = 0; + + hipExternalSemaphoreWaitParams timeline_semaphore_wait_params = {}; + timeline_semaphore_wait_params.params.fence.value = + second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 0; + + hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; + hipExternalSemaphoreWaitParams wait_params[] = {binary_semaphore_wait_params, + timeline_semaphore_wait_params}; + HIP_CHECK(f(ext_semaphores, wait_params, 2, nullptr)); + + PollStream(nullptr, hipErrorNotReady); + + if (second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) { + VkSemaphoreSignalInfo signal_info = {}; + signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; + signal_info.semaphore = timeline_semaphore; + signal_info.value = 1; + VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); + + PollStream(nullptr, hipErrorNotReady); + } + + VkSubmitInfo submit_info = {}; + VkSemaphore signal_semaphores[] = {binary_semaphore, timeline_semaphore}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.signalSemaphoreCount = second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 2; + submit_info.pSignalSemaphores = + second_semaphore_type == VK_SEMAPHORE_TYPE_MAX_ENUM ? &binary_semaphore : signal_semaphores; + + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + + PollStream(nullptr, hipSuccess); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); + HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); +} + +static inline bool operator==(const hipExternalSemaphoreWaitNodeParams& lhs, + const hipExternalSemaphoreWaitNodeParams& rhs) { + bool equal = true; + if (lhs.numExtSems != rhs.numExtSems) { + return false; + } + for (unsigned int i = 0; i < lhs.numExtSems; i++) { + if ((lhs.extSemArray[i] != rhs.extSemArray[i]) || + (lhs.paramsArray[i].params.fence.value != rhs.paramsArray[i].params.fence.value)) { + equal = false; + break; + } + } + return equal; +} + +template +hipError_t GraphExtSemaphoreWaitWrapper(hipExternalSemaphore_t* extSemArray, + hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + hipExternalSemaphoreWaitNodeParams retrieved_params = {}; + memset(&retrieved_params, 0, sizeof(retrieved_params)); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + if constexpr (set_params) { + hipExternalSemaphoreWaitParams* wait_params = new hipExternalSemaphoreWaitParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + wait_params[i].flags = 0; + wait_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreWaitNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = wait_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &initial_params)); + + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeSetParams(node, &node_params)); + + delete[] wait_params; + } else { + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + } + + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeGetParams(node, &retrieved_params)); + REQUIRE(node_params == retrieved_params); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} diff --git a/catch/unit/warp/CMakeLists.txt b/catch/unit/warp/CMakeLists.txt index 87bb5b0ab1..5fded69b55 100644 --- a/catch/unit/warp/CMakeLists.txt +++ b/catch/unit/warp/CMakeLists.txt @@ -1,14 +1,19 @@ # Common Tests - Test independent of all platforms set(TEST_SRC - warp_shfl_xor.cc - warp_shfl.cc - warp_shfl_up.cc - warp_shfl_down.cc warp_ballot.cc warp_any.cc warp_all.cc ) +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC ${TEST_SRC} + warp_shfl.cc + warp_shfl_xor.cc + warp_shfl_up.cc + warp_shfl_down.cc + ) +endif() + hip_add_exe_to_target(NAME WarpTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) diff --git a/catch/unit/warp/warp_common.hh b/catch/unit/warp/warp_common.hh index d09e96837e..15818366ef 100644 --- a/catch/unit/warp/warp_common.hh +++ b/catch/unit/warp/warp_common.hh @@ -21,6 +21,7 @@ THE SOFTWARE. #include #include +#include static __device__ bool deactivate_thread(const uint64_t* const active_masks) { const auto warp = diff --git a/catch/unit/warp/warp_shfl.cc b/catch/unit/warp/warp_shfl.cc index babb814fe4..73913ef672 100644 --- a/catch/unit/warp/warp_shfl.cc +++ b/catch/unit/warp/warp_shfl.cc @@ -100,7 +100,7 @@ template class WarpShfl : public WarpShflTest, T> { * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); @@ -111,11 +111,7 @@ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Positive_Basic", "", int, unsigned int, long, return; } - SECTION("Shfl with specified active mask and input values") { - WarpShfl().run(false); - } + SECTION("Shfl with specified active mask and input values") { WarpShfl().run(false); } - SECTION("Shfl with random active mask and input values") { - WarpShfl().run(true); - } + SECTION("Shfl with random active mask and input values") { WarpShfl().run(true); } } diff --git a/catch/unit/warp/warp_shfl_common.hh b/catch/unit/warp/warp_shfl_common.hh index 97b2677f31..d8fc75eb01 100644 --- a/catch/unit/warp/warp_shfl_common.hh +++ b/catch/unit/warp/warp_shfl_common.hh @@ -25,6 +25,15 @@ THE SOFTWARE. #include #include +static bool operator==(__half x, __half y) { + // __heq doesn't have a __host__ version + return static_cast<__half_raw>(x).x == static_cast<__half_raw>(y).x; +} +static bool operator!=(__half x, __half y) { return static_cast<__half_raw>(x).x != static_cast<__half_raw>(y).x; } + +static bool operator==(__half2 x, __half2 y) { return __hbeq2(x, y); } +static bool operator!=(__half2 x, __half2 y) { return !(__hbeq2(x, y)); } + template class WarpShflTest { public: WarpShflTest() : warp_size_{get_warp_size()} {} @@ -82,6 +91,16 @@ template class WarpShflTest { return static_cast( GenerateRandomReal(std::numeric_limits().min(), std::numeric_limits().max())); }); + } else if constexpr (std::is_same_v<__half, T>) { + std::generate_n(input, grid_.thread_count_, [] { + return __float2half(GenerateRandomReal(std::numeric_limits().min(), + std::numeric_limits().max())); + }); + } else if constexpr (std::is_same_v<__half2, T>) { + std::generate_n(input, grid_.thread_count_, [] { + return __float2half2_rn(GenerateRandomReal(std::numeric_limits().min(), + std::numeric_limits().max())); + }); } else { std::generate_n(input, grid_.thread_count_, [] { return static_cast(GenerateRandomInteger(std::numeric_limits().min(), diff --git a/catch/unit/warp/warp_shfl_down.cc b/catch/unit/warp/warp_shfl_down.cc index d42a19c7d8..58ad8528e7 100644 --- a/catch/unit/warp/warp_shfl_down.cc +++ b/catch/unit/warp/warp_shfl_down.cc @@ -100,7 +100,7 @@ template class WarpShflDown : public WarpShflTest, * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Down_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); diff --git a/catch/unit/warp/warp_shfl_up.cc b/catch/unit/warp/warp_shfl_up.cc index 201289f363..5c55c8a9b9 100644 --- a/catch/unit/warp/warp_shfl_up.cc +++ b/catch/unit/warp/warp_shfl_up.cc @@ -99,7 +99,7 @@ template class WarpShflUp : public WarpShflTest, T> { * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Up_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); diff --git a/catch/unit/warp/warp_shfl_xor.cc b/catch/unit/warp/warp_shfl_xor.cc index 3edbca1b3a..267bc91119 100644 --- a/catch/unit/warp/warp_shfl_xor.cc +++ b/catch/unit/warp/warp_shfl_xor.cc @@ -97,7 +97,7 @@ template class WarpShflXOR : public WarpShflTest, T> * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_XOR_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); diff --git a/utils/coverage/device_api_list.txt b/utils/coverage/device_api_list.txt index 7588950886..73adb4a3d2 100644 --- a/utils/coverage/device_api_list.txt +++ b/utils/coverage/device_api_list.txt @@ -731,3 +731,9 @@ Device float16 functions [ __hisnan2 __hneg2 ] + +OpenGL Interop [ + hipGLGetDevices + hipGraphicsGLRegisterBuffer + hipGraphicsGLRegisterImage +] \ No newline at end of file diff --git a/utils/coverage/hipAPICoverageUtils.cpp b/utils/coverage/hipAPICoverageUtils.cpp index aaf19dd906..ea55351046 100644 --- a/utils/coverage/hipAPICoverageUtils.cpp +++ b/utils/coverage/hipAPICoverageUtils.cpp @@ -47,6 +47,8 @@ void findAPICallInFile(HipAPI& hip_api, std::string test_module_file) { std::string api_member{"." + hip_api.getName() + "("}; std::string api_newline{" " + hip_api.getName() + "("}; std::string api_templated{" " + hip_api.getName() + "<"}; + std::string api_kernel_def_macro{"_KERNEL_DEF(" + hip_api.getName()}; + std::string api_test_def_macro{"_TEST_DEF(" + hip_api.getName()}; std::string api_restriction{hip_api.getFileRestriction()}; bool found_restriction{false}; @@ -66,7 +68,9 @@ void findAPICallInFile(HipAPI& hip_api, std::string test_module_file) { (line.find(api_member) != std::string::npos) || (line.find(api_newline) != std::string::npos) || (line.find(hip_api.getName() + "(") == 0) || - (line.find(api_templated) != std::string::npos)) { + (line.find(api_templated) != std::string::npos) || + (line.find(api_kernel_def_macro) != std::string::npos) || + (line.find(api_test_def_macro) != std::string::npos)) { if (api_restriction == "" || found_restriction) { hip_api.addFileOccurrence(FileOccurrence(test_module_file, line_number)); } @@ -135,6 +139,8 @@ void findAPITestCaseInFileByAPIName(HipAPI& hip_api, std::string test_module_fil std::string line; std::string test_case_definition{"TEST_CASE("}; + std::string test_def_macro{"_TEST_DEF("}; + std::string test_def_impl_macro{"_TEST_DEF_IMPL("}; std::string test_case{"None"}; while (std::getline(test_module_file_handler, line)) { @@ -146,6 +152,14 @@ void findAPITestCaseInFileByAPIName(HipAPI& hip_api, std::string test_module_fil if (test_case.find("_" + hip_api.getName() + "_") != std::string::npos) { hip_api.addTestCase(TestCaseOccurrence{test_case, test_module_file, line_number}); } + } else if ((line.find(test_def_macro) != std::string::npos) || + (line.find(test_def_impl_macro) != std::string::npos)) { + test_case = line.substr(line.find("(") + 1); + test_case = test_case.substr(0, test_case.find(",")); + if (test_case == hip_api.getName() || test_case == hip_api.getName() + "_wrapper") { + hip_api.addTestCase(TestCaseOccurrence{"Unit_Device_" + test_case + "_Accuracy_Positive", + test_module_file, line_number}); + } } } @@ -187,6 +201,7 @@ std::vector extractHipAPIs(std::string& hip_api_header_file, of code shall not be considered. */ std::string hip_api_prefix{"hip"}; + std::string hip_api_prefix_builtin{"__hip"}; std::string group_definition{"@defgroup"}; std::string add_group_definition{"@addtogroup"}; std::string start_of_api_groups{"HIP API"}; @@ -277,7 +292,11 @@ std::vector extractHipAPIs(std::string& hip_api_header_file, Remove all spaces if they exist in the parsed string, e.g., hipError_t hipDeviceSetLimit ( enum hipLimit_t limit, size_t value );. */ - std::string api_name{api_name_no_brackets.substr(api_name_no_brackets.rfind(hip_api_prefix))}; + auto api_name_pos = api_name_no_brackets.rfind(hip_api_prefix_builtin); + if (api_name_pos == std::string::npos) { + api_name_pos = api_name_no_brackets.rfind(hip_api_prefix); + } + std::string api_name{api_name_no_brackets.substr(api_name_pos)}; api_name.erase(std::remove(api_name.begin(), api_name.end(), ' '), api_name.end()); if (!api_group_names_tracker.empty()) { @@ -403,4 +422,4 @@ std::vector extractTestModuleFiles(std::string& tests_root_director std::string findAbsolutePathOfFile(std::string file_path) { return std::filesystem::canonical(std::filesystem::absolute(file_path)); -} +} \ No newline at end of file diff --git a/utils/coverage/mainCoverage.cpp b/utils/coverage/mainCoverage.cpp index cc35cab075..89f7b61652 100644 --- a/utils/coverage/mainCoverage.cpp +++ b/utils/coverage/mainCoverage.cpp @@ -47,6 +47,7 @@ int main(int argc, char** argv) { std::cout << "Number of detected HIP APIs from " << hip_api_header_file << ": " << hip_apis.size() << std::endl; + api_group_names.push_back("Runtime Compilation"); std::vector hip_rtc_apis{extractHipAPIs(hip_rtc_header_file, api_group_names, true)}; std::cout << "Number of detected HIP APIs from " << hip_rtc_header_file << ": " << hip_rtc_apis.size() << std::endl;