SWDEV-1 - Merge github PRs to amd-staging

Change-Id: I2944a63ddc2eec8dc1403d9790ffffbaec343385
Этот коммит содержится в:
Rakesh Roy
2024-03-04 11:51:34 +05:30
родитель 010f4bb0b0 147601aff2
Коммит 57bc68acb1
366 изменённых файлов: 55399 добавлений и 2073 удалений
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+969
Просмотреть файл
@@ -211,6 +211,10 @@
"Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory",
"=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===",
"Unit_hipVectorTypes_test_on_device",
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4",
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4",
"=== Below test is disabled due to defect EXSWHTEC-347 ===",
"Unit_hipPointerSetAttribute_Positive_SyncMemops",
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
"Unit_hiprtc_stdheaders",
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
@@ -222,6 +226,12 @@
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
"Unit_hipMemAddressFree_negative",
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
"Unit_Device_ilogbf_Accuracy_Positive",
"Unit_Device_ilogb_Accuracy_Positive",
"NOTE: The following test is disabled due to defect - EXSWHTEC-245",
"Unit_hipFuncGetAttribute_Negative_Parameters",
"Unit_hipMemAddressFree_negative",
"Unit_hipMemAddressReserve_AlignmentTest",
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
"Unit_hipMemCreate_ChkWithKerLaunch",
@@ -387,6 +397,261 @@
"Performance_hipMemsetD32Async",
"Performance_hipMemcpy2D_HostToHost",
"Performance_hipMemcpy2DAsync_HostToHost",
"Unit_hipDeviceGetGraphMemAttribute_Positive_ReuseMemory",
"Unit_hipGraphAddNodeTypeEventWait_Positive_Basic",
"Unit_hipDrvGraphAddMemsetNode_Negative_Parameters",
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D",
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D",
"Unit_hipDrvGraphAddMemsetNode_hipMallocManaged",
"Unit_hipDrvGraphAddMemcpyNode_Negative_Parameters",
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DGrad_Positive_ReadModeElementType - char",
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DGrad_Positive_ReadModeElementType - short",
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DGrad_Positive_ReadModeElementType - int",
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DGrad_Positive_ReadModeElementType - float",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - char",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - short",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - int",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - float",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - char",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - short",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - int",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - float",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLod_Positive_ReadModeElementType - char",
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex1DLod_Positive_ReadModeElementType - short",
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex1DLod_Positive_ReadModeElementType - int",
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex1DLod_Positive_ReadModeElementType - float",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3D_Positive_ReadModeElementType - char",
"Unit_tex3D_Positive_ReadModeElementType - unsigned char",
"Unit_tex3D_Positive_ReadModeElementType - short",
"Unit_tex3D_Positive_ReadModeElementType - unsigned short",
"Unit_tex3D_Positive_ReadModeElementType - int",
"Unit_tex3D_Positive_ReadModeElementType - unsigned int",
"Unit_tex3D_Positive_ReadModeElementType - float",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DLod_Positive_ReadModeElementType - char",
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex3DLod_Positive_ReadModeElementType - short",
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex3DLod_Positive_ReadModeElementType - int",
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex3DLod_Positive_ReadModeElementType - float",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DGrad_Positive_ReadModeElementType - char",
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex3DGrad_Positive_ReadModeElementType - short",
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex3DGrad_Positive_ReadModeElementType - int",
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex3DGrad_Positive_ReadModeElementType - float",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - char",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemap_Positive_ReadModeElementType - short",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - int",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemap_Positive_ReadModeElementType - float",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2Dgather_Positive_ReadModeElementType - char",
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned char",
"Unit_tex2Dgather_Positive_ReadModeElementType - short",
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned short",
"Unit_tex2Dgather_Positive_ReadModeElementType - int",
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned int",
"Unit_tex2Dgather_Positive_ReadModeElementType - float",
"Unit_tex2D_Positive_ReadModeElementType - char",
"Unit_tex2D_Positive_ReadModeElementType - unsigned char",
"Unit_tex2D_Positive_ReadModeElementType - short",
"Unit_tex2D_Positive_ReadModeElementType - unsigned short",
"Unit_tex2D_Positive_ReadModeElementType - int",
"Unit_tex2D_Positive_ReadModeElementType - unsigned int",
"Unit_tex2D_Positive_ReadModeElementType - float",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayered_Positive_ReadModeElementType - char",
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLayered_Positive_ReadModeElementType - short",
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLayered_Positive_ReadModeElementType - int",
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLayered_Positive_ReadModeElementType - float",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DGrad_Positive_ReadModeElementType - char",
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DGrad_Positive_ReadModeElementType - short",
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DGrad_Positive_ReadModeElementType - int",
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DGrad_Positive_ReadModeElementType - float",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - char",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - short",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - int",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - float",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLod_Positive_ReadModeElementType - char",
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLod_Positive_ReadModeElementType - short",
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLod_Positive_ReadModeElementType - int",
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLod_Positive_ReadModeElementType - float",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - char",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - short",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - int",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - float",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_hipDrvGetErrorName_Positive_Basic",
"Unit_hipDrvGetErrorString_Positive_Basic",
"Unit_hipModuleLaunchKernel_Negative_Parameters",
"Unit_hipModuleGetTexRef_Positive_Basic",
"Unit_hipExtModuleLaunchKernel_Positive_Basic",
"Unit_hipExtModuleLaunchKernel_Negative_Parameters",
"Unit_hipLaunchKernel_Negative_Parameters",
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
"Unit_AtomicBuiltins_Negative_Parameters_RTC",
"Note: Test disabled due to defect - EXSWHTEC-151",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Note: Test disabled due to defect - EXSWHTEC-152",
@@ -446,6 +711,710 @@
"Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
"Unit_hipGraphUpload_Functional_multidevice_test",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_StaticAssert_Positive_Basic_RTC",
"Unit_Assert_Positive_Basic_KernelFail",
"=== Below tests are disabled due to defect EXSWHTEC-356 ===",
"Unit_Device___hisinf2_Accuracy_Positive",
"Unit_Device___hisnan2_Accuracy_Positive",
"Unit_Device___hbequ2_Accuracy_Positive",
"Unit_Device___hne_Accuracy_Positive",
"Unit_Device___hne2_Accuracy_Positive",
"Unit_Device___hbne2_Accuracy_Positive",
"Unit_Device___hbgeu2_Accuracy_Positive",
"Unit_Device___hbgtu2_Accuracy_Positive",
"Unit_Device___hbleu2_Accuracy_Positive",
"Unit_Device___hbltu2_Accuracy_Positive",
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
"Unit_Device___hadd_Sanity_Positive",
"Unit_Device___uhadd_Sanity_Positive",
"Unit_Device___rhadd_Sanity_Positive",
"Unit_Device___urhadd_Sanity_Positive",
"SWDEV-435667 : Below tests failed in stress test on 19/01/24 ===",
"Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - float",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - double",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - float",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - double",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned int",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long long",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - float",
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - double",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
"Below tests failed in stress test of 25/01/24 ===",
"Unit_atomicAnd_Positive_SameAddress - int",
"Unit_atomicAnd_Positive_SameAddress - unsigned int",
"Unit_atomicAnd_Positive_SameAddress - unsigned long",
"Unit_atomicAnd_Positive_SameAddress - unsigned long long",
"Unit_atomicAnd_Positive_Adjacent_Addresses - int",
"Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned int",
"Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long",
"Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long long",
"Unit_atomicAnd_Positive_Scattered_Addresses - int",
"Unit_atomicAnd_Positive_Scattered_Addresses - unsigned int",
"Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long",
"Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long long",
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - int",
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned int",
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long",
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long long",
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - int",
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - int",
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
"Unit_atomicAnd_Negative_Parameters_RTC",
"Unit_atomicOr_Positive_SameAddress - int",
"Unit_atomicOr_Positive_SameAddress - unsigned int",
"Unit_atomicOr_Positive_SameAddress - unsigned long",
"Unit_atomicOr_Positive_SameAddress - unsigned long long",
"Unit_atomicOr_Positive_Adjacent_Addresses - int",
"Unit_atomicOr_Positive_Adjacent_Addresses - unsigned int",
"Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long",
"Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long long",
"Unit_atomicOr_Positive_Scattered_Addresses - int",
"Unit_atomicOr_Positive_Scattered_Addresses - unsigned int",
"Unit_atomicOr_Positive_Scattered_Addresses - unsigned long",
"Unit_atomicOr_Positive_Scattered_Addresses - unsigned long long",
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - int",
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned int",
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long",
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long long",
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - int",
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - int",
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
"Unit_atomicOr_Negative_Parameters_RTC",
"Unit_atomicXor_Positive_SameAddress - int",
"Unit_atomicXor_Positive_SameAddress - unsigned int",
"Unit_atomicXor_Positive_SameAddress - unsigned long",
"Unit_atomicXor_Positive_SameAddress - unsigned long long",
"Unit_atomicXor_Positive_Adjacent_Addresses - int",
"Unit_atomicXor_Positive_Adjacent_Addresses - unsigned int",
"Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long",
"Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long long",
"Unit_atomicXor_Positive_Scattered_Addresses - int",
"Unit_atomicXor_Positive_Scattered_Addresses - unsigned int",
"Unit_atomicXor_Positive_Scattered_Addresses - unsigned long",
"Unit_atomicXor_Positive_Scattered_Addresses - unsigned long long",
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - int",
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned int",
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long",
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long long",
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - int",
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - int",
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
"Unit_atomicXor_Negative_Parameters_RTC",
"Unit_atomicMin_Positive_SameAddress - int",
"Unit_atomicMin_Positive_SameAddress - unsigned int",
"Unit_atomicMin_Positive_SameAddress - unsigned long",
"Unit_atomicMin_Positive_SameAddress - unsigned long long",
"Unit_atomicMin_Positive_Adjacent_Addresses - int",
"Unit_atomicMin_Positive_Adjacent_Addresses - unsigned int",
"Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long",
"Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long long",
"Unit_atomicMin_Positive_Adjacent_Addresses - float",
"Unit_atomicMin_Positive_Adjacent_Addresses - double",
"Unit_atomicMin_Positive_Scattered_Addresses - int",
"Unit_atomicMin_Positive_Scattered_Addresses - unsigned int",
"Unit_atomicMin_Positive_Scattered_Addresses - unsigned long",
"Unit_atomicMin_Positive_Scattered_Addresses - unsigned long long",
"Unit_atomicMin_Positive_Scattered_Addresses - float",
"Unit_atomicMin_Positive_Scattered_Addresses - double",
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - int",
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned int",
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long",
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long long",
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - int",
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float",
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double",
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - int",
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - float",
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - double",
"Unit_atomicMin_Negative_Parameters_RTC",
"Unit_atomicMax_Positive_SameAddress - int",
"Unit_atomicMax_Positive_SameAddress - unsigned int",
"Unit_atomicMax_Positive_SameAddress - unsigned long",
"Unit_atomicMax_Positive_SameAddress - unsigned long long",
"Unit_atomicMax_Positive_Adjacent_Addresses - int",
"Unit_atomicMax_Positive_Adjacent_Addresses - unsigned int",
"Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long",
"Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long long",
"Unit_atomicMax_Positive_Adjacent_Addresses - float",
"Unit_atomicMax_Positive_Adjacent_Addresses - double",
"Unit_atomicMax_Positive_Scattered_Addresses - int",
"Unit_atomicMax_Positive_Scattered_Addresses - unsigned int",
"Unit_atomicMax_Positive_Scattered_Addresses - unsigned long",
"Unit_atomicMax_Positive_Scattered_Addresses - unsigned long long",
"Unit_atomicMax_Positive_Scattered_Addresses - float",
"Unit_atomicMax_Positive_Scattered_Addresses - double",
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - int",
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned int",
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long",
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long long",
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - int",
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float",
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double",
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - int",
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - float",
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - double",
"Unit_atomicMax_Negative_Parameters_RTC",
"Unit_safeAtomicMin_Positive_Adjacent_Addresses - float",
"Unit_safeAtomicMin_Positive_Adjacent_Addresses - double",
"Unit_safeAtomicMin_Positive_Scattered_Addresses - float",
"Unit_safeAtomicMin_Positive_Scattered_Addresses - double",
"Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float",
"Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double",
"Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float",
"Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double",
"Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - float",
"Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - double",
"Unit_unsafeAtomicMin_Positive_Scattered_Addresses - float",
"Unit_unsafeAtomicMin_Positive_Scattered_Addresses - double",
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float",
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double",
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float",
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double",
"Unit_safeAtomicMax_Positive_Adjacent_Addresses - float",
"Unit_safeAtomicMax_Positive_Adjacent_Addresses - double",
"Unit_safeAtomicMax_Positive_Scattered_Addresses - float",
"Unit_safeAtomicMax_Positive_Scattered_Addresses - double",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float",
"Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double",
"Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - float",
"Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - double",
"Unit_unsafeAtomicMax_Positive_Scattered_Addresses - float",
"Unit_unsafeAtomicMax_Positive_Scattered_Addresses - double",
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float",
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double",
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float",
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - int",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - float",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - double",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - float",
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - double",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - int",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - float",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - double",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - float",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - int",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - float",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - double",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - float",
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - double",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - int",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - float",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - double",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - float",
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - double",
"Unit_atomicExch_Positive - int",
"Unit_atomicExch_Positive - unsigned int",
"Unit_atomicExch_Positive - unsigned long",
"Unit_atomicExch_Positive - unsigned long long",
"Unit_atomicExch_Positive - float",
"Unit_atomicExch_Positive - double",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - int",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - int",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - int",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - int",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - int",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - int",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned int",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long long",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - int",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned int",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - int",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned int",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long",
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long long",
"Unit___hip_atomic_exchange_Positive_Wavefront - int",
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_exchange_Positive_Wavefront - float",
"Unit___hip_atomic_exchange_Positive_Wavefront - double",
"Unit___hip_atomic_exchange_Positive_Workgroup - int",
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_exchange_Positive_Workgroup - float",
"Unit___hip_atomic_exchange_Positive_Workgroup - double",
"=== Below tests cause timeout in stress test of 09/02/24 ===",
"Unit_Device___half2half2_Accuracy_Positive",
"Unit_Device_make_half2_Accuracy_Positive",
"Unit_Device___halves2half2_Accuracy_Positive",
"Unit_Device___low2half_Accuracy_Positive",
"Unit_Device___high2half_Accuracy_Positive",
"Unit_Device___low2half2_Accuracy_Positive",
"Unit_Device___high2half2_Accuracy_Positive",
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
"Unit_Device___lows2half2_Accuracy_Positive",
"Unit_Device___highs2half2_Accuracy_Positive",
"Unit_Device___float2half2_rn_Accuracy_Positive",
"Unit_Device___floats2half2_rn_Accuracy_Positive",
"Unit_Device___float22half2_rn_Accuracy_Positive",
"Unit_Device___low2float_Accuracy_Positive",
"Unit_Device___high2float_Accuracy_Positive",
"Unit_Device___half22float2_Accuracy_Positive",
"Unit_Device_hcos_Accuracy_Positive",
"Unit_Device_h2cos_Accuracy_Positive",
"Unit_Device_hsin_Accuracy_Positive",
"Unit_Device_h2sin_Accuracy_Positive",
"Unit_Device_hexp_Accuracy_Positive",
"Unit_Device_h2exp_Accuracy_Positive",
"Unit_Device_hexp10_Accuracy_Positive",
"Unit_Device_h2exp10_Accuracy_Positive",
"Unit_Device_hexp2_Accuracy_Positive",
"Unit_Device_h2exp2_Accuracy_Positive",
"Unit_Device_hlog_Accuracy_Positive",
"Unit_Device_h2log_Accuracy_Positive",
"Unit_Device_hlog10_Accuracy_Positive",
"Unit_Device_h2log10_Accuracy_Positive",
"Unit_Device_hlog2_Accuracy_Positive",
"Unit_Device_h2log2_Accuracy_Positive",
"Unit_Device_hsqrt_Accuracy_Positive",
"Unit_Device_h2sqrt_Accuracy_Positive",
"Unit_Device_hceil_Accuracy_Positive",
"Unit_Device_h2ceil_Accuracy_Positive",
"Unit_Device_hfloor_Accuracy_Positive",
"Unit_Device_h2floor_Accuracy_Positive",
"Unit_Device_htrunc_Accuracy_Positive",
"Unit_Device_h2trunc_Accuracy_Positive",
"Unit_Device_hrcp_Accuracy_Positive",
"Unit_Device_h2rcp_Accuracy_Positive",
"Unit_Device_hrsqrt_Accuracy_Positive",
"Unit_Device_h2rsqrt_Accuracy_Positive",
"Unit_Device_hrint_Accuracy_Positive",
"Unit_Device_h2rint_Accuracy_Positive",
"Unit_Device___habs_Accuracy_Positive",
"Unit_Device___habs2_Accuracy_Positive",
"Unit_Device___hneg_Accuracy_Positive",
"Unit_Device___hneg2_Accuracy_Positive",
"Unit_Device___hadd_wrapper_Accuracy_Positive",
"Unit_Device___hadd2_Accuracy_Positive",
"Unit_Device___hadd_sat_Accuracy_Positive",
"Unit_Device___hadd2_sat_Accuracy_Positive",
"Unit_Device___hsub_Accuracy_Positive",
"Unit_Device___hsub2_Accuracy_Positive",
"Unit_Device___hsub_sat_Accuracy_Positive",
"Unit_Device___hsub2_sat_Accuracy_Positive",
"Unit_Device___hmul_Accuracy_Positive",
"Unit_Device___hmul2_Accuracy_Positive",
"Unit_Device___hmul_sat_Accuracy_Positive",
"Unit_Device___hmul2_sat_Accuracy_Positive",
"Unit_Device___hdiv_Accuracy_Positive",
"Unit_Device___h2div_Accuracy_Positive",
"Unit_Device___hfma_Accuracy_Positive",
"Unit_Device___hfma2_Accuracy_Positive",
"Unit_Device___hfma_sat_Accuracy_Positive",
"Unit_Device___hfma2_sat_Accuracy_Positive",
"Unit_Device___hisinf_Accuracy_Positive",
"Unit_Device___hisinf2_Accuracy_Positive",
"Unit_Device___hisnan_Accuracy_Positive",
"Unit_Device___hisnan2_Accuracy_Positive",
"Unit_Device___heq_Accuracy_Positive",
"Unit_Device___hbeq2_Accuracy_Positive",
"Unit_Device___hequ_Accuracy_Positive",
"Unit_Device___hbequ2_Accuracy_Positive",
"Unit_Device___heq2_Accuracy_Positive",
"Unit_Device___hequ2_Accuracy_Positive",
"Unit_Device___hne_Accuracy_Positive",
"Unit_Device___hbne2_Accuracy_Positive",
"Unit_Device___hneu_Accuracy_Positive",
"Unit_Device___hbneu2_Accuracy_Positive",
"Unit_Device___hne2_Accuracy_Positive",
"Unit_Device___hneu2_Accuracy_Positive",
"Unit_Device___hge_Accuracy_Positive",
"Unit_Device___hbge2_Accuracy_Positive",
"Unit_Device___hgeu_Accuracy_Positive",
"Unit_Device___hbgeu2_Accuracy_Positive",
"Unit_Device___hge2_Accuracy_Positive",
"Unit_Device___hgeu2_Accuracy_Positive",
"Unit_Device___hgt_Accuracy_Positive",
"Unit_Device___hbgt2_Accuracy_Positive",
"Unit_Device___hgtu_Accuracy_Positive",
"Unit_Device___hbgtu2_Accuracy_Positive",
"Unit_Device___hgt2_Accuracy_Positive",
"Unit_Device___hgtu2_Accuracy_Positive",
"Unit_Device___hle_Accuracy_Positive",
"Unit_Device___hble2_Accuracy_Positive",
"Unit_Device___hleu_Accuracy_Positive",
"Unit_Device___hbleu2_Accuracy_Positive",
"Unit_Device___hle2_Accuracy_Positive",
"Unit_Device___hleu2_Accuracy_Positive",
"Unit_Device___hlt_Accuracy_Positive",
"Unit_Device___hblt2_Accuracy_Positive",
"Unit_Device___hltu_Accuracy_Positive",
"Unit_Device___hbltu2_Accuracy_Positive",
"Unit_Device___hlt2_Accuracy_Positive",
"Unit_Device___hltu2_Accuracy_Positive",
"Unit_Device___hmax_Accuracy_Positive",
"Unit_Device___hmin_Accuracy_Positive",
"Unit_Device___hmax_nan_Accuracy_Positive",
"Unit_Device___hmin_nan_Accuracy_Positive",
"Unit_Device___half2int_rn_Accuracy_Positive",
"Unit_Device___half2int_rz_Accuracy_Positive",
"Unit_Device___half2int_rd_Accuracy_Positive",
"Unit_Device___half2int_ru_Accuracy_Positive",
"Unit_Device___half2uint_rn_Accuracy_Positive",
"Unit_Device___half2uint_rz_Accuracy_Positive",
"Unit_Device___half2uint_rd_Accuracy_Positive",
"Unit_Device___half2uint_ru_Accuracy_Positive",
"Unit_Device___half2short_rn_Accuracy_Positive",
"Unit_Device___half2short_rz_Accuracy_Positive",
"Unit_Device___half2short_rd_Accuracy_Positive",
"Unit_Device___half2short_ru_Accuracy_Positive",
"Unit_Device___half2ushort_rn_Accuracy_Positive",
"Unit_Device___half2ushort_rz_Accuracy_Positive",
"Unit_Device___half2ushort_rd_Accuracy_Positive",
"Unit_Device___half2ushort_ru_Accuracy_Positive",
"Unit_Device___half2ll_rn_Accuracy_Positive",
"Unit_Device___half2ll_rz_Accuracy_Positive",
"Unit_Device___half2ll_rd_Accuracy_Positive",
"Unit_Device___half2ll_ru_Accuracy_Positive",
"Unit_Device___half2ull_rn_Accuracy_Positive",
"Unit_Device___half2ull_rz_Accuracy_Positive",
"Unit_Device___half2ull_rd_Accuracy_Positive",
"Unit_Device___half2ull_ru_Accuracy_Positive",
"Unit_Device___half_as_short_Accuracy_Positive",
"Unit_Device___half_as_ushort_Accuracy_Positive",
"Unit_Device___int2half_rn_Accuracy_Positive",
"Unit_Device___int2half_rz_Accuracy_Positive",
"Unit_Device___int2half_rd_Accuracy_Positive",
"Unit_Device___int2half_ru_Accuracy_Positive",
"Unit_Device___uint2half_rn_Accuracy_Positive",
"Unit_Device___uint2half_rz_Accuracy_Positive",
"Unit_Device___uint2half_rd_Accuracy_Positive",
"Unit_Device___uint2half_ru_Accuracy_Positive",
"Unit_Device___short2half_rn_Accuracy_Positive",
"Unit_Device___short2half_rz_Accuracy_Positive",
"Unit_Device___short2half_rd_Accuracy_Positive",
"Unit_Device___short2half_ru_Accuracy_Positive",
"Unit_Device___ushort2half_rn_Accuracy_Positive",
"Unit_Device___ushort2half_rz_Accuracy_Positive",
"Unit_Device___ushort2half_rd_Accuracy_Positive",
"Unit_Device___ushort2half_ru_Accuracy_Positive",
"Unit_Device___ll2half_rn_Accuracy_Positive",
"Unit_Device___ll2half_rz_Accuracy_Positive",
"Unit_Device___ll2half_rd_Accuracy_Positive",
"Unit_Device___ll2half_ru_Accuracy_Positive",
"Unit_Device___ull2half_rn_Accuracy_Positive",
"Unit_Device___ull2half_rz_Accuracy_Positive",
"Unit_Device___ull2half_rd_Accuracy_Positive",
"Unit_Device___ull2half_ru_Accuracy_Positive",
"Unit_Device___short_as_half_Accuracy_Positive",
"Unit_Device___ushort_as_half_Accuracy_Positive",
"Unit_Device___float2half_rn_Accuracy_Positive",
"Unit_Device___float2half_Accuracy_Positive",
"Unit_Device___half2float_Accuracy_Positive",
"Unit_Device___frcp_rn_Accuracy_Positive",
"Unit_Device___fsqrt_rn_Accuracy_Positive",
"Unit_Device___frsqrt_rn_Accuracy_Positive",
"Unit_Device___expf_Accuracy_Positive",
"Unit_Device___exp10f_Accuracy_Positive",
"Unit_Device___logf_Accuracy_Positive",
"Unit_Device___log2f_Accuracy_Positive",
"Unit_Device___log10f_Accuracy_Positive",
"Unit_Device___sinf_Accuracy_Positive",
"Unit_Device___sincosf_sin_Accuracy_Positive",
"Unit_Device___cosf_Accuracy_Positive",
"Unit_Device___sincosf_cos_Accuracy_Positive",
"Unit_Device___fadd_rn_Accuracy_Positive",
"Unit_Device___fsub_rn_Accuracy_Positive",
"Unit_Device___fmul_rn_Accuracy_Positive",
"Unit_Device___fdiv_rn_Accuracy_Positive",
"Unit_Device___fdividef_Accuracy_Positive",
"Unit_Device___fmaf_rn_Accuracy_Positive",
"Unit_Device___drcp_rn_Accuracy_Positive",
"Unit_Device___dsqrt_rn_Accuracy_Positive",
"Unit_Device___dadd_rn_Accuracy_Positive",
"Unit_Device___dsub_rn_Accuracy_Positive",
"Unit_Device___dmul_rn_Accuracy_Positive",
"Unit_Device___ddiv_rn_Accuracy_Positive",
"Unit_Device___fma_rn_Accuracy_Positive",
"Unit___hip_atomic_load_store_Positive_Acquire_Release",
"Unit___hip_atomic_exchange_Positive_Acquire_Release",
"Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release",
"Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release",
"Unit___hip_atomic_fetch_add_Positive_Acquire_Release",
"Unit___hip_atomic_fetch_and_Positive_Acquire_Release",
"Unit___hip_atomic_fetch_or_Positive_Acquire_Release",
"Unit___hip_atomic_fetch_xor_Positive_Acquire_Release",
"Unit___hip_atomic_fetch_min_Positive_Acquire_Release",
"Unit___hip_atomic_fetch_max_Positive_Acquire_Release",
"Unit___hip_atomic_load_store_Positive_Sequential_Consistency",
"Unit___hip_atomic_exchange_Positive_Sequential_Consistency",
"Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency",
"Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - int",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - float",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - double",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - int",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - float",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - double",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double",
"Unit_atomicAdd_Positive - int",
"Unit_atomicAdd_Positive - unsigned int",
"Unit_atomicAdd_Positive - unsigned long",
"Unit_atomicAdd_Positive - unsigned long long",
"Unit_atomicAdd_Positive - float",
"Unit_atomicAdd_Positive - double",
"Unit_atomicAdd_Positive_Multi_Kernel - int",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned int",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicAdd_Positive_Multi_Kernel - float",
"Unit_atomicAdd_Positive_Multi_Kernel - double",
"Unit_atomicAdd_Negative_Parameters_RTC",
"Unit_atomicAdd_system_Positive_Peer_GPUs - int",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicAdd_system_Positive_Peer_GPUs - float",
"Unit_atomicAdd_system_Positive_Peer_GPUs - double",
"Unit_atomicAdd_system_Positive_Host_And_GPU - int",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicAdd_system_Positive_Host_And_GPU - float",
"Unit_atomicAdd_system_Positive_Host_And_GPU - double",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double",
"Unit_unsafeAtomicAdd_Positive - float",
"Unit_unsafeAtomicAdd_Positive - double",
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float",
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double",
"Unit_safeAtomicAdd_Positive - float",
"Unit_safeAtomicAdd_Positive - double",
"Unit_safeAtomicAdd_Positive_Multi_Kernel - float",
"Unit_safeAtomicAdd_Positive_Multi_Kernel - double",
"Unit_atomicSub_Positive - int",
"Unit_atomicSub_Positive - unsigned int",
"Unit_atomicSub_Positive - unsigned long",
"Unit_atomicSub_Positive - unsigned long long",
"Unit_atomicSub_Positive - float",
"Unit_atomicSub_Positive - double",
"Unit_atomicSub_Positive_Multi_Kernel - int",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned int",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicSub_Positive_Multi_Kernel - float",
"Unit_atomicSub_Positive_Multi_Kernel - double",
"Unit_atomicSub_Negative_Parameters_RTC",
"Unit_atomicSub_system_Positive_Peer_GPUs - int",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicSub_system_Positive_Peer_GPUs - float",
"Unit_atomicSub_system_Positive_Peer_GPUs - double",
"Unit_atomicSub_system_Positive_Host_And_GPU - int",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicSub_system_Positive_Host_And_GPU - float",
"Unit_atomicSub_system_Positive_Host_And_GPU - double",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double",
"Unit_atomicInc_Positive - unsigned int",
"Unit_atomicInc_Positive_Multi_Kernel - unsigned int",
"Unit_atomicInc_Negative_Parameters_RTC",
"Unit_atomicDec_Positive - unsigned int",
"Unit_atomicDec_Positive_Multi_Kernel - unsigned int",
"Unit_atomicDec_Negative_Parameters_RTC",
"Unit_atomicCAS_Positive - int",
"Unit_atomicCAS_Positive - unsigned int",
"Unit_atomicCAS_Positive - unsigned long long",
"Unit_atomicCAS_Positive_Multi_Kernel - int",
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned int",
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicCAS_Negative_Parameters_RTC",
"Unit_atomicCAS_system_Positive_Peer_GPUs - int",
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicCAS_system_Positive_Host_And_GPU - int",
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long",
#endif
"End of json"
]
+144 -1
Просмотреть файл
@@ -89,6 +89,149 @@
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - char",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemap_Positive_ReadModeElementType - short",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemap_Positive_ReadModeElementType - int",
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemap_Positive_ReadModeElementType - float",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
"Unit_hipDrvGetErrorString_Positive_Basic",
"Unit_hipLaunchKernel_Negative_Parameters",
"Unit_Assert_Positive_Basic_KernelFail",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_hipMemImportFromShareableHandle_Positive_MultiProc",
"Unit_hipMemMapArrayAsync_Positive_Basic"
]
}
+2 -1
Просмотреть файл
@@ -44,6 +44,7 @@
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
"Unit_hipMemMapArrayAsync_Positive_Basic"
]
}
+6
Просмотреть файл
@@ -36,6 +36,12 @@ int main(int argc, char** argv) {
| Opt(cmd_options.cg_iterations, "cg_iterations")
["-C"]["--cg-iterations"]
("Number of iterations used for cooperative groups sync tests (default: 5)")
| Opt(cmd_options.accuracy_iterations, "accuracy_iterations")
["-A"]["--accuracy-iterations"]
("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)")
| Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory")
["-M"]["--accuracy-max-memory"]
("Percentage of global device memory allowed for math accuracy tests (default: 80%)")
;
// clang-format on
+5
Просмотреть файл
@@ -22,6 +22,9 @@ THE SOFTWARE.
#pragma once
#include <cstdint>
#include <limits>
struct CmdOptions {
int iterations = 10;
int warmups = 100;
@@ -29,6 +32,8 @@ struct CmdOptions {
int cg_iterations = 5;
bool no_display = false;
bool progress = false;
uint64_t accuracy_iterations = std::numeric_limits<uint32_t>::max() + 1ull;
int accuracy_max_memory = 80;
};
extern CmdOptions cmd_options;
Просмотреть файл
+16 -3
Просмотреть файл
@@ -129,6 +129,19 @@ THE SOFTWARE.
} \
}
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
{ \
auto localError = errorExpr; \
INFO("Matching Errors: " \
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
<< "\n Expected Code: " << expectedError << '\n' \
<< " Actual Error: " << hiprtcGetErrorString(localError) \
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
REQUIRE(localError == expectedError); \
}
#define HIPASSERT(condition) \
if (!(condition)) { \
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
@@ -165,7 +178,7 @@ static inline bool IsGfx11() {
hipDeviceProp_t props{};
HIP_CHECK(hipGetDevice(&device));
HIP_CHECK(hipGetDeviceProperties(&props, device));
// Get GCN Arch Name and compare to check if it is gfx11
// Get GCN Arch Name and compare to check if it is gfx11
std::string arch = std::string(props.gcnArchName);
auto pos = arch.find("gfx11");
if (pos != std::string::npos)
@@ -173,7 +186,7 @@ static inline bool IsGfx11() {
else
return false;
#else
std::cout<<"Have to be either Nvidia or AMD platform, asserting"<<std::endl;
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
assert(false);
#endif
}
@@ -308,7 +321,7 @@ void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerB
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
std::forward<Args>(packedArgs)...);
#endif
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipGetLastError());
}
//---
+45 -8
Просмотреть файл
@@ -39,6 +39,13 @@ THE SOFTWARE.
* @}
*/
/**
* @defgroup AtomicsTest Device Atomics
* @{
* This section describes tests for the Device Atomic APIs.
* @}
*/
/**
* @defgroup DeviceLanguageTest Device Language
* @{
@@ -96,16 +103,23 @@ THE SOFTWARE.
*/
/**
* @defgroup KernelTest Kernel Functions Management
* @{
* This section describes the various kernel functions invocation.
* @}
*/
* @defgroup KernelTest Kernel Functions Management
* @{
* This section describes the various kernel functions invocation.
* @}
*/
/**
* @defgroup AtomicsTest Device Atomics
* @defgroup SyncthreadsTest Synchronization Functions
* @{
* This section describes tests for the Device Atomic APIs.
* This section describes tests for Synchronization Functions.
* @}
*/
/**
* @defgroup ThreadfenceTest Memory Fence Functions
* @{
* This section describes tests for Memory Fence Functions.
* @}
*/
@@ -119,7 +133,8 @@ THE SOFTWARE.
/**
* @defgroup PeerToPeerTest PeerToPeer Device Memory Access
* @{
* This section describes tests for the PeerToPeer device memory access functions of HIP runtime API.
* This section describes tests for the PeerToPeer device memory access functions of HIP runtime
* API.
* @warning PeerToPeer support is experimental.
* @}
*/
@@ -135,6 +150,7 @@ THE SOFTWARE.
* @defgroup ShflTest warp shuffle function Management
* @{
* This section describes the warp shuffle types & functions of HIP runtime API.
* @}
*/
/**
@@ -158,6 +174,13 @@ THE SOFTWARE.
* @}
*/
/**
* @defgroup ModuleTest Module Management
* @{
* This section describes the module management types & functions of HIP runtime API.
* @}
*/
/**
* @defgroup TextureTest Texture Management
* @{
@@ -172,6 +195,13 @@ THE SOFTWARE.
* @}
*/
/**
* @defgroup MathTest Math Device Functions
* @{
* This section describes tests for device math functions of HIP runtime API.
* @}
*/
/**
* @defgroup PrintfTest Printf API Management
* @{
@@ -192,3 +222,10 @@ THE SOFTWARE.
* This section describes tests for the Complex type functions.
* @}
*/
/**
* @defgroup VirtualMemoryManagementTest Virtual Memory Management APIs
* @{
* This section describes the virtual memory management types & functions of HIP runtime API.
* @}
*/
+90 -11
Просмотреть файл
@@ -23,7 +23,7 @@ THE SOFTWARE.
#pragma once
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
#pragma clang diagnostic ignored "-Wunused-parameter"
#include <variant>
#include <hip_test_common.hh>
@@ -44,8 +44,9 @@ static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction
}
};
static hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
static inline hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos,
PtrVariant src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms parms = {0};
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
parms.dstArray = std::get<hipArray_t>(dst_ptr);
@@ -185,7 +186,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
std::string msg = "Skipped as peer access cannot be enabled between devices " +
std::to_string(src_device) + " " + std::to_string(dst_device);
std::to_string(src_device) + " " + std::to_string(dst_device);
HipTest::HIP_SKIP_TEST(msg.c_str());
return;
}
@@ -205,7 +206,8 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
// Using dst_alloc width and height to set only the elements that will be copied over to
// dst_alloc
Iota<<<blocks, threads_per_block, 0, kernel_stream>>>(src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width_logical(),dst_alloc.height(), dst_alloc.depth());
dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
@@ -626,15 +628,14 @@ constexpr auto MemTypeUnified() {
using DrvPtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
template <bool async = false>
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
hipStream_t stream = nullptr) {
static inline HIP_MEMCPY3D GetDrvMemcpy3DParms(DrvPtrVariant dst_ptr, hipPos dst_pos,
DrvPtrVariant src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind) {
HIP_MEMCPY3D parms = {0};
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
parms.dstMemoryType = hipMemoryTypeArray;
parms.dstArray = std::get<hipArray_t>(dst_ptr);
parms.dstArray = std::get<hipArray_t>(dst_ptr);
} else {
auto ptr = std::get<hipPitchedPtr>(dst_ptr);
parms.dstPitch = ptr.pitch;
@@ -694,6 +695,84 @@ hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVaria
parms.dstY = dst_pos.y;
parms.dstZ = dst_pos.z;
return parms;
}
static inline bool operator==(const HIP_MEMCPY3D& lhs, const HIP_MEMCPY3D& rhs) {
bool pos_eq = lhs.dstXInBytes == rhs.dstXInBytes && lhs.dstY == rhs.dstY &&
lhs.dstZ == rhs.dstZ && lhs.srcXInBytes == rhs.srcXInBytes && lhs.srcY == rhs.srcY &&
lhs.srcZ == rhs.srcZ;
bool extent_eq =
lhs.WidthInBytes == rhs.WidthInBytes && lhs.Height == rhs.Height && lhs.Depth == rhs.Depth;
bool mem_eq = true;
if (lhs.dstArray) {
mem_eq = lhs.dstArray == rhs.dstArray && lhs.dstMemoryType == rhs.dstMemoryType;
} else {
mem_eq = lhs.dstPitch == rhs.dstPitch && lhs.dstMemoryType == rhs.dstMemoryType;
}
if (lhs.srcArray) {
mem_eq = lhs.srcArray == rhs.srcArray && lhs.srcMemoryType == rhs.srcMemoryType;
} else {
mem_eq = lhs.srcPitch == rhs.srcPitch && lhs.srcMemoryType == rhs.srcMemoryType;
}
if (lhs.dstDevice) {
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
}
if (lhs.dstHost) {
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
}
if (lhs.srcDevice) {
mem_eq = mem_eq && (lhs.srcDevice == rhs.srcDevice);
}
if (lhs.srcHost) {
mem_eq = mem_eq && (lhs.srcHost == rhs.srcHost);
}
return pos_eq && extent_eq && mem_eq;
}
// APIs hipDrvGraphMemcpyNodeGetParams, hipDrvGraphMemcpyNodeSetParams are yet to be implemented in HIP runtime.
#if 0
template <bool set_params = false>
hipError_t DrvMemcpy3DGraphWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
hipCtx_t context, hipStream_t stream = nullptr) {
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
hipGraph_t g = nullptr;
HIP_CHECK(hipGraphCreate(&g, 0));
hipGraphNode_t node = nullptr;
if constexpr (set_params) {
auto reversed_parms = GetDrvMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
ReverseMemcpyDirection(kind));
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms, context));
HIP_CHECK(hipDrvGraphMemcpyNodeSetParams(node, &parms));
} else {
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &parms, context));
}
HIP_MEMCPY3D retrieved_params = {0};
HIP_CHECK(hipDrvGraphMemcpyNodeGetParams(node, &retrieved_params));
REQUIRE(parms == retrieved_params);
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(g));
return hipSuccess;
}
#endif //if 0
template <bool async = false>
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
hipStream_t stream = nullptr) {
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
if constexpr (async) {
return hipDrvMemcpy3DAsync(&parms, stream);
} else {
@@ -805,4 +884,4 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream
};
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
}
+83 -41
Просмотреть файл
@@ -35,15 +35,15 @@ enum class LinearAllocs {
inline std::string to_string(const LinearAllocs allocation_type) {
switch (allocation_type) {
case LinearAllocs::malloc:
return "host pageable";
return "malloc";
case LinearAllocs::mallocAndRegister:
return "registered";
return "malloc + hipHostRegister";
case LinearAllocs::hipHostMalloc:
return "host pinned";
return "hipHostMalloc";
case LinearAllocs::hipMalloc:
return "device malloc";
return "hipMalloc";
case LinearAllocs::hipMallocManaged:
return "managed";
return "hipMallocManaged";
default:
return "unknown alloc type";
}
@@ -83,24 +83,38 @@ template <typename T> class LinearAllocGuard {
LinearAllocGuard(const LinearAllocGuard&) = delete;
LinearAllocGuard(LinearAllocGuard&& o)
: allocation_type_{o.allocation_type_}, ptr_{o.ptr_}, host_ptr_{o.host_ptr_} {
o.allocation_type_ = LinearAllocs::noAlloc;
o.ptr_ = nullptr;
o.host_ptr_ = nullptr;
}
LinearAllocGuard(LinearAllocGuard&& o) { *this = std::move(o); }
LinearAllocGuard& operator=(LinearAllocGuard&& o) {
allocation_type_ = o.allocation_type_;
ptr_ = o.ptr_;
host_ptr_ = o.host_ptr_;
if (this != &o) {
dealloc();
o.allocation_type_ = LinearAllocs::noAlloc;
o.ptr_ = nullptr;
o.host_ptr_ = nullptr;
allocation_type_ = o.allocation_type_;
ptr_ = o.ptr_;
host_ptr_ = o.host_ptr_;
o.allocation_type_ = LinearAllocs::noAlloc;
o.ptr_ = nullptr;
o.host_ptr_ = nullptr;
}
return *this;
}
~LinearAllocGuard() {
~LinearAllocGuard() { dealloc(); }
T* ptr() const { return ptr_; };
T* host_ptr() const { return host_ptr_; }
private:
LinearAllocs allocation_type_ = LinearAllocs::noAlloc;
T* ptr_ = nullptr;
T* host_ptr_ = nullptr;
void dealloc() {
if (ptr_ == nullptr) {
return;
}
// No Catch macros, don't want to possibly throw in the destructor
if (ptr_ != nullptr) {
switch (allocation_type_) {
@@ -123,14 +137,6 @@ template <typename T> class LinearAllocGuard {
}
}
}
T* ptr() const { return ptr_; };
T* host_ptr() const { return host_ptr_; }
private:
LinearAllocs allocation_type_ = LinearAllocs::noAlloc;
T* ptr_ = nullptr;
T* host_ptr_ = nullptr;
};
template <typename T> class LinearAllocGuardMultiDim {
@@ -210,6 +216,42 @@ template <typename T> class ArrayAllocGuard {
const hipExtent extent_;
};
template <typename T> class MipmappedArrayAllocGuard {
public:
// extent should contain logical width
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int levels,
const unsigned int flags)
: extent_{extent}, levels_{levels} {
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
HIP_CHECK(hipMallocMipmappedArray(&ptr_, &desc, extent_, levels_, flags));
}
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u)
: MipmappedArrayAllocGuard{extent, 1, flags} {}
~MipmappedArrayAllocGuard() { static_cast<void>(hipFreeMipmappedArray(ptr_)); }
MipmappedArrayAllocGuard(const MipmappedArrayAllocGuard&) = delete;
MipmappedArrayAllocGuard(MipmappedArrayAllocGuard&&) = delete;
hipMipmappedArray_t ptr() const { return ptr_; }
hipArray_t GetLevel(unsigned int level) {
hipArray_t ret;
HIP_CHECK(hipGetMipmappedArrayLevel(&ret, ptr_, level));
return ret;
}
hipExtent extent() const { return extent_; }
unsigned int levels() const { return levels_; }
private:
hipMipmappedArray_t ptr_ = nullptr;
const hipExtent extent_;
const unsigned int levels_;
};
template <typename T> class DrvArrayAllocGuard {
public:
// extent should contain width in bytes
@@ -266,24 +308,24 @@ class StreamGuard {
StreamGuard(const StreamGuard&) = delete;
StreamGuard(StreamGuard&& o)
: stream_type_{o.stream_type_}, flags_{o.flags_}, priority_{o.priority_}, stream_{o.stream_} {
o.stream_type_ = Streams::nullstream;
o.flags_ = 0u;
o.priority_ = 0;
o.stream_ = nullptr;
}
StreamGuard(StreamGuard&& o) { *this = std::move(o); }
StreamGuard& operator=(StreamGuard&& o) {
stream_type_ = o.stream_type_;
flags_ = o.flags_;
priority_ = o.priority_;
stream_ = o.stream_;
if (this != &o) {
if (stream_type_ == Streams::created) {
static_cast<void>(hipStreamDestroy(stream_));
}
o.stream_type_ = Streams::nullstream;
o.flags_ = 0u;
o.priority_ = 0;
o.stream_ = nullptr;
stream_type_ = o.stream_type_;
flags_ = o.flags_;
priority_ = o.priority_;
stream_ = o.stream_;
o.stream_type_ = Streams::nullstream;
o.flags_ = 0u;
o.priority_ = 0;
o.stream_ = nullptr;
}
return *this;
}
+1 -1
Просмотреть файл
@@ -170,7 +170,7 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes)
return (... && DeviceAttributeSupport(device, attributes));
}
inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) {
inline int GetDeviceAttribute(const hipDeviceAttribute_t attr, int device) {
int value = 0;
HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));
return value;
+7
Просмотреть файл
@@ -22,6 +22,7 @@ add_subdirectory(rtc)
add_subdirectory(deviceLib)
add_subdirectory(graph)
add_subdirectory(memory)
add_subdirectory(stream_ordered)
add_subdirectory(stream)
add_subdirectory(event)
add_subdirectory(occupancy)
@@ -43,11 +44,15 @@ add_subdirectory(g++)
add_subdirectory(module)
add_subdirectory(channelDescriptor)
add_subdirectory(executionControl)
add_subdirectory(math)
add_subdirectory(vector_types)
add_subdirectory(atomics)
add_subdirectory(complex)
add_subdirectory(p2p)
add_subdirectory(gcc)
add_subdirectory(syncthreads)
add_subdirectory(threadfence)
add_subdirectory(virtualMemoryManagement)
if(HIP_PLATFORM STREQUAL "amd")
add_subdirectory(callback)
@@ -58,3 +63,5 @@ add_subdirectory(vulkan_interop)
add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246
endif()
add_subdirectory(synchronization)
add_subdirectory(launchBounds)
add_subdirectory(assertion)
+49
Просмотреть файл
@@ -0,0 +1,49 @@
# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
if(HIP_PLATFORM MATCHES "nvidia")
set(TEST_SRC
assert.cc
)
hip_add_exe_to_target(NAME AssertionTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS nvrtc)
elseif(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
static_assert.cc
assert.cc
)
hip_add_exe_to_target(NAME AssertionTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS hiprtc)
endif()
# Below tests fail in PSDB
#add_test(NAME Unit_StaticAssert_Positive_Basic
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# static_assert_kernels_positive.cc 2)
#
#add_test(NAME Unit_StaticAssert_Negative_Basic
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# static_assert_kernels_negative.cc 2)
+124
Просмотреть файл
@@ -0,0 +1,124 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <csetjmp>
#include <csignal>
/**
* @addtogroup assert assert
* @{
* @ingroup DeviceLanguageTest
* `void assert(int expression)` -
* Stops the kernel execution if expression is equal to zero.
*/
jmp_buf env_ignore_abort;
volatile int abort_raised_flag = 0;
void on_sigabrt(int signum) {
signal(signum, SIG_DFL);
abort_raised_flag = 1;
longjmp(env_ignore_abort, 1);
}
void try_and_catch_abort(void (*func)()) {
if (!setjmp(env_ignore_abort)) {
signal(SIGABRT, &on_sigabrt);
(*func)();
signal(SIGABRT, SIG_DFL);
}
}
__global__ void AssertPassKernel(int* x) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
*x = tid;
// expected always to be true
assert(tid >= 0);
}
__global__ void AssertFailKernel(int* x) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
*x = tid;
// expected to fail for the even thread indices
assert(tid % 2 == 1);
}
template <bool should_abort> void LaunchAssertKernel() {
const int num_blocks = 2;
const int num_threads = 16;
int *d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(int)));
if constexpr (should_abort) {
AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
#if HT_AMD
HIP_CHECK(hipDeviceSynchronize());
#else
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
#endif
} else {
AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
HIP_CHECK(hipDeviceSynchronize());
}
HIP_CHECK(hipFree(d_a));
}
/**
* Test Description
* ------------------------
* - Launches kernels with asserts that have an expression equal to 1.
* - Expects that SIGABRT is not raised and kernels have executed successfully.
* Test source
* ------------------------
* - unit/assertion/assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
try_and_catch_abort(&LaunchAssertKernel<false>);
REQUIRE(abort_raised_flag == 0);
}
/**
* Test Description
* ------------------------
* - Launches kernels with asserts that have an expression equal to 0.
* - Expects that SIGABRT is raised and kernels have been stopped on AMD.
* - The HIP runtime also aborts the host code, so this test case uses signal handlers
* to avoid host code abortion.
* - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA.
* - The host code is not aborted.
* Test source
* ------------------------
* - unit/assertion/assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") {
try_and_catch_abort(&LaunchAssertKernel<true>);
#if HT_AMD
REQUIRE(abort_raised_flag == 1);
#else
REQUIRE(abort_raised_flag == 0);
#endif
}
+88
Просмотреть файл
@@ -0,0 +1,88 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "static_assert_kernels_rtc.hh"
/**
* @addtogroup static_assert static_assert
* @{
* @ingroup DeviceLanguageTest
* `void static_assert(constexpr expression, const char* message)` -
* Stops the compilation if expression is equal to zero, and displays the specified message.
*/
void StaticAssertWrapper(const char* program_source) {
hiprtcProgram program{};
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{2};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
/**
* Test Description
* ------------------------
* - Compiles kernels with static_assert calls:
* -# Expected that static_assert passes and compilation is successful.
* -# Expected that static_assert fails and compilation has errors.
* - Uses RTC to perform compilation.
* Test source
* ------------------------
* - unit/assertion/static_assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); }
/**
* Test Description
* ------------------------
* - Passes invalidly formed expressions to static_assert calls.
* - Uses expressions that are not constexpr and values that are not known during compilation.
* - Uses RTC to perform compilation.
* Test source
* ------------------------
* - unit/assertion/static_assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); }
+30
Просмотреть файл
@@ -0,0 +1,30 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
__global__ void StaticAssertErrorKernel1() {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
}
__global__ void StaticAssertErrorKernel2() {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
}
+32
Просмотреть файл
@@ -0,0 +1,32 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
__global__ void StaticAssertPassKernel1() {
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
}
__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); }
__global__ void StaticAssertFailKernel1() {
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
}
__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); }
+56
Просмотреть файл
@@ -0,0 +1,56 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Positive and negative kernels used for the static_assert Test Cases that are using RTC.
*/
static constexpr auto kStaticAssert_Positive{
R"(
__global__ void StaticAssertPassKernel1() {
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
}
__global__ void StaticAssertPassKernel2() {
static_assert(10 > 5, "[StaticAssertPassKernel2]");
}
__global__ void StaticAssertFailKernel1() {
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
}
__global__ void StaticAssertFailKernel2() {
static_assert(10 < 5, "[StaticAssertFailKernel2]");
}
)"};
static constexpr auto kStaticAssert_Negative{
R"(
__global__ void StaticAssertErrorKernel1() {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
}
__global__ void StaticAssertErrorKernel2() {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
}
)"};
+137 -23
Просмотреть файл
@@ -18,31 +18,145 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
atomicExch.cc
atomicExch_system.cc
)
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
atomicAnd.cc
atomicAnd_system.cc
atomicOr.cc
atomicOr_system.cc
atomicXor.cc
atomicXor_system.cc
atomicMin.cc
atomicMin_system.cc
atomicMax.cc
atomicMax_system.cc
safeAtomicMin.cc
unsafeAtomicMin.cc
safeAtomicMax.cc
unsafeAtomicMax.cc
__hip_atomic_fetch_min.cc
__hip_atomic_fetch_max.cc
atomic_builtins.cc
acquire_release.cc
sequential_consistency.cc
atomicAdd.cc
atomicAdd_system.cc
unsafeAtomicAdd.cc
safeAtomicAdd.cc
atomicSub.cc
atomicSub_system.cc
atomicCAS.cc
atomicCAS_system.cc
__hip_atomic_fetch_add.cc
__hip_atomic_compare_exchange_strong.cc
atomicExch.cc
atomicExch_system.cc
__hip_atomic_fetch_and.cc
__hip_atomic_fetch_or.cc
__hip_atomic_fetch_xor.cc
__hip_atomic_exchange.cc
)
if(HIP_PLATFORM MATCHES "nvidia")
set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
hip_add_exe_to_target(NAME AtomicsTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS "nvrtc -rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
elseif(HIP_PLATFORM MATCHES "amd")
hip_add_exe_to_target(NAME AtomicsTest
#atomicInc & atomicDec tests are disabled on MI300X due to SWDEV-440688
set(NOT_FOR_MI300X_TEST
atomicInc.cc
atomicDec.cc
)
set(MI300X_TARGET gfx941)
function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL)
set(ARCH_CHECK -1 PARENT_SCOPE)
string(REGEX MATCHALL "--offload-arch=gfx[0-9a-z]+" OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR_LOCAL})
foreach(OFFLOAD_ARCH IN LISTS OFFLOAD_ARCH_LIST)
string(REGEX MATCHALL "--offload-arch=(gfx[0-9a-z]+)" matches ${OFFLOAD_ARCH})
if (CMAKE_MATCH_COUNT EQUAL 1)
if (CMAKE_MATCH_1 IN_LIST MI300X_TARGET)
set(ARCH_CHECK 1 PARENT_SCOPE)
endif() # CMAKE_MATCH_1
endif() # CMAKE_MATCH_COUNT
endforeach() # OFFLOAD_ARCH_LIST
endfunction() # CheckAcceptedArchs
if (DEFINED OFFLOAD_ARCH_STR)
CheckRejectedArchs(${OFFLOAD_ARCH_STR})
elseif(DEFINED $ENV{HCC_AMDGPU_TARGET})
CheckRejectedArchs($ENV{HCC_AMDGPU_TARGET})
else()
set(ARCH_CHECK -1)
endif()
if(${ARCH_CHECK} EQUAL -1)
message(STATUS "Adding test: ${NOT_FOR_MI300X_TEST}")
set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI300X_TEST})
else()
message(STATUS "Removing test: ${NOT_FOR_MI300X_TEST}")
endif()
hip_add_exe_to_target(NAME AtomicsTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS hiprtc)
endif()
set(EXPECTED_ERRORS 48)
# SWDEV-435667: Below 2 tests failed in stress test on 01/12/23
#add_test(NAME Unit_atomicExch_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicExch_negative_kernels.cc 40)
#
#add_test(NAME Unit_atomicExch_system_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicExch_system_negative_kernels.cc 40)
# Below tests fail in PSDB
#add_test(NAME Unit_atomicAnd_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicAnd_negative_kernels.cc ${EXPECTED_ERRORS})
#
#add_test(NAME Unit_atomicOr_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicOr_negative_kernels.cc ${EXPECTED_ERRORS})
#
#add_test(NAME Unit_atomicXor_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicXor_negative_kernels.cc ${EXPECTED_ERRORS})
#
#add_test(NAME Unit_atomicMin_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicMin_negative_kernels.cc ${EXPECTED_ERRORS})
#
#add_test(NAME Unit_atomicMax_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicMax_negative_kernels.cc ${EXPECTED_ERRORS})
#add_test(NAME Unit_AtomicBuiltins_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomic_builtins_kernels.cc 60 27) # Should be 35 warnings, see EXSWHTEC-309
#add_test(NAME Unit_atomicAdd_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicAdd_negative_kernels.cc 48)
#add_test(NAME Unit_atomicSub_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicSub_negative_kernels.cc 48)
#add_test(NAME Unit_atomicInc_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicInc_negative_kernels.cc 8)
#
#add_test(NAME Unit_atomicDec_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicDec_negative_kernels.cc 8)
#
#add_test(NAME Unit_atomicCAS_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicCAS_negative_kernels.cc 48)
#
# SWDEV-435667: Below 2 tests failed in stress test on 01/12/23
#add_test(NAME Unit_atomicExch_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicExch_negative_kernels.cc 40)
#
#add_test(NAME Unit_atomicExch_system_Negative_Parameters
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
# atomicExch_system_negative_kernels.cc 40)
endif()
+129
Просмотреть файл
@@ -0,0 +1,129 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_compare_exchange_strong __hip_atomic_compare_exchange_strong
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of __hip_atomic_compare_exchange_strong
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - WAVEFRONT memory scope.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_compare_exchange_strong.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of __hip_atomic_compare_exchange_strong
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - WORKGROUP memory scope.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_compare_exchange_strong.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, cache_line_size);
}
}
}
+136
Просмотреть файл
@@ -0,0 +1,136 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "atomicExch_common.hh"
/**
* @addtogroup __hip_atomic_exchange __hip_atomic_exchange
* @{
* @ingroup AtomicsTest
* ________________________
* Test cases from other modules:
* - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* exchange into a runtime determined memory location. Each thread will exchange its own grid wide
* linear index + offset into the memory location, storing the return value into a separate output
* array slot corresponding to it. Once complete, the union of output array and exchange memory is
* validated to contain all values in the range [0, number_of_threads +
* number_of_exchange_memory_slots). Several memory access patterns are tested:
* -# All threads exchange to a single memory location
* -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicExch
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory
* - Exchange memory located in shared memory
* - WAVEFRONT memory scope
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_exchange.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Wavefront", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* exchange into a runtime determined memory location. Each thread will exchange its own grid wide
* linear index + offset into the memory location, storing the return value into a separate output
* array slot corresponding to it. Once complete, the union of output array and exchange memory is
* validated to contain all values in the range [0, number_of_threads +
* number_of_exchange_memory_slots). Several memory access patterns are tested:
* -# All threads exchange to a single memory location
* -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicExch
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory
* - Exchange memory located in shared memory
* - WORKGROUP memory scope
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_exchange.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Workgroup", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
cache_line_size);
}
}
}
+132
Просмотреть файл
@@ -0,0 +1,132 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add
* @{
* @ingroup AtomicsTest
* ________________________
* Test cases from other modules:
* - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of __hip_atomic_fetch_add
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - WAVEFRONT memory scope.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_add.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Wavefront", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of __hip_atomic_fetch_add
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - WORKGROUP memory scope.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_add.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Workgroup", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, cache_line_size);
}
}
}
+187
Просмотреть файл
@@ -0,0 +1,187 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_fetch_and __hip_atomic_fetch_and
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_and.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_and.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_and.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_and.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_and.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_and.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
cache_line_size);
}
}
}
+187
Просмотреть файл
@@ -0,0 +1,187 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_fetch_max __hip_atomic_fetch_max
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_max.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_max.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_max.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_max.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_max.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_max.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
cache_line_size);
}
}
}
+187
Просмотреть файл
@@ -0,0 +1,187 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_fetch_min __hip_atomic_fetch_min
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_min.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_min.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_min.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_min.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_min.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_min.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
cache_line_size);
}
}
}
+187
Просмотреть файл
@@ -0,0 +1,187 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_fetch_or __hip_atomic_fetch_or
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_or.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_or.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_or.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_or.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_or.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_or.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
cache_line_size);
}
}
}
+187
Просмотреть файл
@@ -0,0 +1,187 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup __hip_atomic_fetch_xor __hip_atomic_fetch_xor
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_xor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_xor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_xor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on the same
* address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_xor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress", "", int,
unsigned int, unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on adjacent
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_xor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on scattered
* addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/__hip_atomic_fetch_xor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
cache_line_size);
}
}
}
+551
Просмотреть файл
@@ -0,0 +1,551 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "memory_order_common.hh"
TEST_CASE("Unit___hip_atomic_load_store_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_exchange_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong,
__ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong,
__ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong,
__ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST>();
}
}
}
TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Acquire_Release") {
SECTION("ACQUIRE/RELEASE") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE>();
}
}
SECTION("ACQ_REL") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL>();
}
}
SECTION("SEQ_CST") {
SECTION("WAVEFRONT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST>();
}
}
}
+577
Просмотреть файл
@@ -0,0 +1,577 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
#include <resource_guards.hh>
#include <cmd_options.hh>
namespace cg = cooperative_groups;
// Atomic operations for which the tests in this file apply for
enum class AtomicOperation {
kAdd = 0,
kAddSystem,
kSub,
kSubSystem,
kInc,
kDec,
kUnsafeAdd,
kSafeAdd,
kCASAdd,
kCASAddSystem,
kBuiltinAdd,
kBuiltinCAS
};
// Constants that are passed as operands to the atomic operations
constexpr auto kIntegerTestValue = 7;
constexpr auto kFloatingPointTestValue = 3.125;
constexpr auto kIncDecWraparoundValue = 1023;
// Retrieves test value constant based on the atomic operation and test type:
// - kIncDecWraparoundValue for increment and decrement operations
// - kFloatingPointTestValue for floating point test type
// - kIntegerTestValue for integer test type
template <typename TestType, AtomicOperation operation>
__host__ __device__ TestType GetTestValue() {
if constexpr (operation == AtomicOperation::kInc || operation == AtomicOperation::kDec) {
return kIncDecWraparoundValue;
}
return std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
}
// Implements an atomic addition via atomicCAS
template <typename TestType> __device__ TestType CASAtomicAdd(TestType* address, TestType val) {
TestType old = *address, assumed;
do {
assumed = old;
old = atomicCAS(address, assumed, val + assumed);
} while (assumed != old);
return old;
}
// Implements an atomic addition via atomicCAS_system
template <typename TestType>
__device__ TestType CASAtomicAddSystem(TestType* address, TestType val) {
TestType old = *address, assumed;
do {
assumed = old;
old = atomicCAS_system(address, assumed, val + assumed);
} while (assumed != old);
return old;
}
// Implements an atomic addition via __hip_atomic_compare_exchange_strong
template <typename TestType, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__device__ TestType BuiltinCASAtomicAdd(TestType* address, TestType val) {
TestType old = *address, assumed;
const auto builtin_cas = [](TestType* address, TestType assumed, TestType val) {
__hip_atomic_compare_exchange_strong(address, &assumed, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
memory_scope);
return assumed;
};
do {
assumed = old;
old = builtin_cas(address, assumed, val + assumed);
} while (assumed != old);
return old;
}
// Performs an atomic operation on parameter `mem` based on the `operation` enumerator.
// `memory_scope` is forwarded to the builtin operations and is by default device-wide.
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__device__ TestType PerformAtomicOperation(TestType* const mem) {
const auto val = GetTestValue<TestType, operation>();
if constexpr (operation == AtomicOperation::kAdd) {
return atomicAdd(mem, val);
} else if constexpr (operation == AtomicOperation::kAddSystem) {
return atomicAdd_system(mem, val);
} else if constexpr (operation == AtomicOperation::kSub) {
return atomicSub(mem, val);
} else if constexpr (operation == AtomicOperation::kSubSystem) {
return atomicSub_system(mem, val);
} else if constexpr (operation == AtomicOperation::kInc) {
return atomicInc(mem, val);
} else if constexpr (operation == AtomicOperation::kDec) {
return atomicDec(mem, val);
} else if constexpr (operation == AtomicOperation::kUnsafeAdd) {
return unsafeAtomicAdd(mem, val);
} else if constexpr (operation == AtomicOperation::kSafeAdd) {
return safeAtomicAdd(mem, val);
} else if constexpr (operation == AtomicOperation::kCASAdd) {
return CASAtomicAdd(mem, val);
} else if constexpr (operation == AtomicOperation::kCASAddSystem) {
return CASAtomicAddSystem(mem, val);
} else if constexpr (operation == AtomicOperation::kBuiltinAdd) {
return __hip_atomic_fetch_add(mem, val, __ATOMIC_RELAXED, memory_scope);
} else if constexpr (operation == AtomicOperation::kBuiltinCAS) {
return BuiltinCASAtomicAdd<TestType, memory_scope>(mem, val);
}
}
// This kernel executes the atomic operation specified by the enumerator `operation`. Results of
// the atomic operations are stored in `old_vals`. Each thread executes the atomic operation on the
// same memory location `global_mem`.
// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic
// operations are executed on shared memory, and the result is copied back to `global_mem`.
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
__shared__ TestType shared_mem;
const auto tid = cg::this_grid().thread_rank();
TestType* const mem = use_shared_mem ? &shared_mem : global_mem;
if constexpr (use_shared_mem) {
if (tid == 0) mem[0] = global_mem[0];
__syncthreads();
}
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
if constexpr (use_shared_mem) {
__syncthreads();
if (tid == 0) global_mem[0] = mem[0];
}
}
// Indexes array `ptr`, with the size in bytes of each element specified by `pitch`
template <typename TestType>
__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch,
const unsigned int idx) {
const auto byte_ptr = reinterpret_cast<uint8_t*>(ptr);
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
}
// Executes arbitrary load-store operations on the range specified by `begin_addr` and `end_addr`
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
uint8_t val = *addr;
val ^= 0xAB;
*addr = val;
}
}
// This kernel executes the atomic operation specified by the enumerator `operation`. Results of the
// atomic operations are stored in `old_vals`. `global_mem` is an array with `width` number of
// elements. Each thread performs the atomic operation on the element that corresponds to its thread
// id (tid % width).
// The elements of `global_mem` can be larger than sizeof(TestType) with the actual size in bytes
// specified by `pitch`. This is done so we can test scenarios where threads target memory locations
// that are scattered over different cache lines.
// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic operations
// are executed on shared memory, and the result is copied back to `global_mem`.
// If `pitch` is greater than sizeof(TestType), random memory operations are performed in the empty
// space between consecutive atomic operations so that we can test that the atomic operations
// behaves correctly even with some interference.
//
// For example, given that sizeof(TestType) is 1, `width` is 3, and `pitch` is 4:
//
// 0 1 2 3 4 5 6 7 8 9 10 11
// global_mem -> | x | | | | x | | | | x | | | |
// | pitch | pitch | pitch |
//
// In this scenario, the atomic operations will target the elements denoted with `x` (addresses 0,
// 4, 8). Random memory traffic will be generated on the addresses in between (1, 2, 3, 5, 6, 7, 9,
// 10, 11)
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
const unsigned int width, const unsigned int pitch) {
extern __shared__ uint8_t shared_mem[];
const auto tid = cg::this_grid().thread_rank();
TestType* const mem = use_shared_mem ? reinterpret_cast<TestType*>(shared_mem) : global_mem;
if constexpr (use_shared_mem) {
if (tid < width) {
const auto target = PitchedOffset(mem, pitch, tid);
*target = *PitchedOffset(global_mem, pitch, tid);
};
__syncthreads();
}
const auto n = cooperative_groups::this_grid().size() - width;
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
if (tid < n) {
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
PitchedOffset(mem, pitch, tid % width));
} else {
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
GenerateMemoryTraffic(begin_addr, end_addr);
}
if constexpr (use_shared_mem) {
__syncthreads();
if (tid < width) {
const auto target = PitchedOffset(global_mem, pitch, tid);
*target = *PitchedOffset(mem, pitch, tid);
};
}
}
// Used to configure test run
struct TestParams {
auto ThreadCount() const {
return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z;
}
auto HostIterationsPerThread() const { // number of iterations per host thread
return std::max(num_devices * kernel_count * ThreadCount() / 20, width);
}
dim3 blocks; // number of blocks per kernel launch
dim3 threads; // number of threads per kernel launch
unsigned int num_devices = 1u; // number of devices used
unsigned int kernel_count = 1u; // number of kernels launched per device
unsigned int width = 1u; // number of memory locations targeted
unsigned int pitch = 0u; // defines spacing between memory locations
unsigned int host_thread_count = 0u; // number of host threads launched
LinearAllocs alloc_type; // type of allocation used
};
// Reference implementation used to verify results
template <typename TestType, AtomicOperation operation>
std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const TestParams& p) {
const auto val = GetTestValue<TestType, operation>();
const auto total_thread_count = p.num_devices * p.kernel_count * p.ThreadCount() +
p.host_thread_count * p.HostIterationsPerThread();
std::vector<TestType> res_vals(p.width);
std::vector<TestType> old_vals;
old_vals.reserve(total_thread_count);
auto perform_op = [&](unsigned id) {
auto& res = res_vals[id % p.width];
old_vals.push_back(res);
if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem ||
operation == AtomicOperation::kUnsafeAdd ||
operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd ||
operation == AtomicOperation::kCASAddSystem ||
operation == AtomicOperation::kBuiltinAdd ||
operation == AtomicOperation::kBuiltinCAS) {
res = res + val;
} else if constexpr (operation == AtomicOperation::kSub ||
operation == AtomicOperation::kSubSystem) {
res = res - val;
} else if constexpr (operation == AtomicOperation::kInc) {
res = (res >= val) ? 0 : res + 1;
} else if constexpr (operation == AtomicOperation::kDec) {
res = ((res == 0) || (res > val)) ? val : res - 1;
}
};
for (auto i = 0u; i < p.num_devices; ++i) {
for (auto j = 0u; j < p.kernel_count; ++j) {
for (auto tid = 0u; tid < p.ThreadCount() - p.width; ++tid) {
perform_op(tid);
}
}
}
for (auto i = 0u; i < p.host_thread_count; ++i) {
for (auto j = 0u; j < p.HostIterationsPerThread(); ++j) {
perform_op(j);
}
}
return {res_vals, old_vals};
}
// Compares the results of the test kernel stored in `res_vals` with results generated by the
// reference implementation
template <typename TestType, AtomicOperation operation>
void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<TestType>& old_vals) {
auto [expected_res_vals, expected_old_vals] = TestKernelHostRef<TestType, operation>(p);
for (auto i = 0u; i < res_vals.size(); ++i) {
INFO("Results index: " << i);
REQUIRE(expected_res_vals[i] == res_vals[i]);
}
std::sort(begin(old_vals), end(old_vals));
std::sort(begin(expected_old_vals), end(expected_old_vals));
for (auto i = 0u; i < old_vals.size(); ++i) {
INFO("Old values index: " << i);
REQUIRE(expected_old_vals[i] == old_vals[i]);
}
}
// Launches the test kernel
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
TestType* const old_vals) {
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
if (p.width == 1 && p.pitch == sizeof(TestType))
TestKernel<TestType, operation, use_shared_mem, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
else
TestKernel<TestType, operation, use_shared_mem, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
}
// Performs a host atomic operation on parameter `mem` based on the `operation` enumerator.
template <typename TestType, AtomicOperation operation>
void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* const old_vals,
const unsigned int width, const unsigned pitch, TestType /*base_val*/) {
const auto val = GetTestValue<TestType, operation>();
for (auto i = 0u; i < iterations; ++i) {
if constexpr (operation == AtomicOperation::kAddSystem ||
operation == AtomicOperation::kCASAddSystem ||
operation == AtomicOperation::kBuiltinAdd ||
operation == AtomicOperation::kBuiltinCAS) {
old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED);
} else if constexpr (operation == AtomicOperation::kSubSystem) {
old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED);
}
}
}
// Launches host threads based on TestParams::host_thread_count that compete with the test kernel
// for the same resources
template <typename TestType, AtomicOperation operation>
void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* const old_vals) {
if (p.host_thread_count == 0) {
return;
}
const auto host_base_val = p.num_devices * p.kernel_count * p.ThreadCount();
std::vector<std::thread> threads;
for (auto i = 0u; i < p.host_thread_count; ++i) {
const auto iterations = p.HostIterationsPerThread();
const auto thread_base_val = host_base_val + i * iterations;
threads.push_back(std::thread(HostAtomicOperation<TestType, operation>, iterations, mem,
old_vals + thread_base_val, p.width, p.pitch, thread_base_val));
}
for (auto& th : threads) {
th.join();
}
}
// This is the main body of the test:
// 1. Allocate memory based on TestParams::alloc_type
// 2. Launch kernels based on TestParams::num_devices and TestParams::kernel_count
// 3. Launch host threads based on TestParams::host_thread_count
// 4. Verify the results
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void TestCore(const TestParams& p) {
const unsigned int flags =
p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u;
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
std::vector<StreamGuard> streams;
for (auto i = 0; i < p.num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size);
for (auto j = 0; j < p.kernel_count; ++j) {
streams.emplace_back(Streams::created);
}
}
const auto mem_alloc_size = p.width * p.pitch;
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size, flags);
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount() +
p.host_thread_count * p.HostIterationsPerThread());
std::vector<TestType> res_vals(p.width);
TestType* const mem_ptr =
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
for (auto i = 0u; i < p.num_devices; ++i) {
for (auto j = 0u; j < p.kernel_count; ++j) {
const auto& stream = streams[i * p.kernel_count + j].stream();
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
old_vals);
}
}
PerformHostAtomicOperation<TestType, operation>(p, mem_dev.host_ptr(), old_vals.data());
for (auto i = 0u; i < p.num_devices; ++i) {
const auto device_offset = i * p.kernel_count * p.ThreadCount();
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
old_vals_alloc_size, hipMemcpyDeviceToHost));
}
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
p.width, hipMemcpyDeviceToHost));
Verify<TestType, operation>(p, res_vals, old_vals);
}
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
inline dim3 GenerateBlockDimensions() {
int sm_count = 0;
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
}
// Configures and creates the TestCore for a single device, and a single kernel launch
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
TestParams params;
params.num_devices = 1;
params.kernel_count = 1;
if constexpr ((operation == AtomicOperation::kBuiltinAdd ||
operation == AtomicOperation::kBuiltinCAS) &&
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
params.threads = 1;
} else if constexpr ((operation == AtomicOperation::kBuiltinAdd ||
operation == AtomicOperation::kBuiltinCAS) &&
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
params.threads = dim3(warp_size);
} else {
params.threads = GenerateThreadDimensions();
}
params.width = width;
params.pitch = pitch;
SECTION("Global memory") {
if constexpr ((operation == AtomicOperation::kBuiltinAdd ||
operation == AtomicOperation::kBuiltinCAS) &&
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
params.blocks = dim3(1);
} else {
params.blocks = GenerateBlockDimensions();
}
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false, memory_scope>(params);
}
}
}
SECTION("Shared memory") {
params.blocks = dim3(1);
params.alloc_type = LinearAllocs::hipMalloc;
TestCore<TestType, operation, true, memory_scope>(params);
}
}
// Configures and creates the TestCore for a single device, and multiple kernel launches
template <typename TestType, AtomicOperation operation>
void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width,
const unsigned int pitch) {
int concurrent_kernels = 0;
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0));
if (!concurrent_kernels) {
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
return;
}
TestParams params;
params.num_devices = 1;
params.kernel_count = kernel_count;
params.blocks = GenerateBlockDimensions();
params.threads = GenerateThreadDimensions();
params.width = width;
params.pitch = pitch;
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false>(params);
}
}
}
// Configures and creates the TestCore for a multiple devices (and host), and multiple kernel
// launches
template <typename TestType, AtomicOperation operation>
void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices,
const unsigned int kernel_count,
const unsigned int width, const unsigned int pitch,
const unsigned int host_thread_count = 0u) {
if (num_devices > 1) {
if (HipTest::getDeviceCount() < num_devices) {
std::string msg = std::to_string(num_devices) + " devices are required";
HipTest::HIP_SKIP_TEST(msg.c_str());
return;
}
}
if (kernel_count > 1) {
for (auto i = 0u; i < num_devices; ++i) {
int concurrent_kernels = 0;
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
if (!concurrent_kernels) {
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
return;
}
}
}
TestParams params;
params.num_devices = num_devices;
params.kernel_count = kernel_count;
params.blocks = GenerateBlockDimensions();
params.threads = GenerateThreadDimensions();
params.width = width;
params.pitch = pitch;
params.host_thread_count = host_thread_count;
using LA = LinearAllocs;
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
}
}
}
+167
Просмотреть файл
@@ -0,0 +1,167 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include "atomicAdd_negative_kernels_rtc.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicAdd atomicAdd
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicAdd
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/atomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kAdd>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kAdd>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kAdd>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the memory
* location, storing the return value into a separate output array slot corresponding to it. Once
* complete, the output array and target memory is validated to contain all the expected values.
* Several memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicAdd
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive_Multi_Kernel", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kAdd>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kAdd>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kAdd>(2, warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
* atomicAdd.
* Test source
* ------------------------
* - unit/atomics/atomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicAdd_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicAdd_int, kAtomicAdd_uint, kAtomicAdd_ulong,
kAtomicAdd_ulonglong, kAtomicAdd_float, kAtomicAdd_double);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicAdd_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+219
Просмотреть файл
@@ -0,0 +1,219 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicAdd(int* address, int val) */
__global__ void atomicAdd_int_v1(int* address, int* result) { *result = atomicAdd(&address, 1234); }
__global__ void atomicAdd_int_v2(int* address, int* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_int_v3(int* address, int* result) { *result = atomicAdd(1234, 1234); }
__global__ void atomicAdd_int_v4(Dummy* address, int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_int_v5(char* address, int* result) { *result = atomicAdd(address, 1234); }
__global__ void atomicAdd_int_v6(short* address, int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_int_v7(long* address, int* result) { *result = atomicAdd(address, 1234); }
__global__ void atomicAdd_int_v8(long long* address, int* result) {
*result = atomicAdd(address, 1234);
}
/* unsigned int atomicAdd(unsigned int* address, unsigned int val) */
__global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicAdd(1234, 1234);
}
__global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v5(char* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v6(short* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v7(long* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
/* atomicAdd(unsigned long* address, unsigned long val) */
__global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicAdd(1234, 1234);
}
__global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
/* atomicAdd(unsigned long long* address, unsigned long long val) */
__global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicAdd(1234, 1234);
}
__global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
/* atomicAdd(float* address, float val) */
__global__ void atomicAdd_float_v1(float* address, float* result) {
*result = atomicAdd(&address, 1234.f);
}
__global__ void atomicAdd_float_v2(float* address, float* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_float_v3(float* address, float* result) {
*result = atomicAdd(1234.f, 1234.f);
}
__global__ void atomicAdd_float_v4(Dummy* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v5(char* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v6(short* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v7(long* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v8(long long* address, float* result) {
*result = atomicAdd(address, 1234);
}
/* atomicAdd(double* address, double val) */
__global__ void atomicAdd_double_v1(double* address, double* result) {
*result = atomicAdd(&address, 1234.0);
}
__global__ void atomicAdd_double_v2(double* address, double* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_double_v3(double* address, double* result) {
*result = atomicAdd(1234.0, 1234.0);
}
__global__ void atomicAdd_double_v4(Dummy* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v5(char* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v6(short* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v7(long* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v8(long long* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
+273
Просмотреть файл
@@ -0,0 +1,273 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicAdd_int{
R"(
__global__ void atomicAdd_int_v1(int* address, int* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_int_v2(int* address, int* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_int_v3(int* address, int* result) {
*result = atomicAdd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAdd_int_v4(Dummy* address, int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_int_v5(char* address, int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_int_v6(short* address, int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_int_v7(long* address, int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_int_v8(long long* address, int* result) {
*result = atomicAdd(address, 1234);
}
)"};
static constexpr auto kAtomicAdd_uint{
R"(
__global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicAdd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v5(char* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v6(short* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v7(long* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) {
*result = atomicAdd(address, 1234);
}
)"};
static constexpr auto kAtomicAdd_ulong{
R"(
__global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicAdd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) {
*result = atomicAdd(address, 1234);
}
)"};
static constexpr auto kAtomicAdd_ulonglong{
R"(
__global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicAdd(&address, 1234);
}
__global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicAdd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
__global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicAdd(address, 1234);
}
)"};
static constexpr auto kAtomicAdd_float{
R"(
__global__ void atomicAdd_float_v1(float* address, float* result) {
*result = atomicAdd(&address, 1234.f);
}
__global__ void atomicAdd_float_v2(float* address, float* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_float_v3(float* address, float* result) {
*result = atomicAdd(1234.f, 1234.f);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAdd_float_v4(Dummy* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v5(char* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v6(short* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v7(long* address, float* result) {
*result = atomicAdd(address, 1234.f);
}
__global__ void atomicAdd_float_v8(long long* address, float* result) {
*result = atomicAdd(address, 1234);
}
)"};
static constexpr auto kAtomicAdd_double{
R"(
__global__ void atomicAdd_double_v1(double* address, double* result) {
*result = atomicAdd(&address, 1234.0);
}
__global__ void atomicAdd_double_v2(double* address, double* result) {
*result = atomicAdd(address, address);
}
__global__ void atomicAdd_double_v3(double* address, double* result) {
*result = atomicAdd(1234.0, 1234.0);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAdd_double_v4(Dummy* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v5(char* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v6(short* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v7(long* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
__global__ void atomicAdd_double_v8(long long* address, double* result) {
*result = atomicAdd(address, 1234.0);
}
)"};
+177
Просмотреть файл
@@ -0,0 +1,177 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicAdd_system atomicAdd_system
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the memory
* location, storing the return value into a separate output array slot corresponding to it. Once
* complete, the output array and target memory is validated to contain all the expected values.
* Several memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicAdd_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicAdd_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
2, 2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
2, 2, warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel on a single device wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the memory
* location, storing the return value into a separate output array slot corresponding to it. While
* the kernel is running, the host performs atomic additions, in 4 threads, on the same memory
* location(s). Once complete, the output array and target memory is validated to contain all the
* expected values. Several memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicAdd_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicAdd_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
1, 1, 1, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
1, 1, warp_size, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
1, 1, warp_size, cache_line_size, 4);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times on two devices wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the memory
* location, storing the return value into a separate output array slot corresponding to it. While
* the kernel is running, the host performs atomic additions, in 4 threads, on the same memory
* location(s). Once complete, the output array and target memory is validated to contain all the
* expected values. Several memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicAdd_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicAdd_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
2, 2, 1, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
2, 2, warp_size, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
2, 2, warp_size, cache_line_size, 4);
}
}
}
+222
Просмотреть файл
@@ -0,0 +1,222 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "atomicAnd_negative_kernels_rtc.hh"
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicAnd atomicAnd
* @{
* @ingroup AtomicsTest
* `atomicAnd(TestType* address, TestType* val)` -
* performs atomic bitwise AND between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicAnd from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_SameAddress", "", int, unsigned int, unsigned long,
unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Scattered_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Compiles atomicAnd with invalid parameters.
* - Compiles the source with RTC.
* Test source
* ------------------------
* - unit/atomics/atomicAnd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicAnd_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source =
GENERATE(kAtomicAnd_int, kAtomicAnd_uint, kAtomicAnd_ulong, kAtomicAnd_ulonglong);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicAnd_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
// Please check the content of negative_kernels_rtc.hh
int expected_error_count{9};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+185
Просмотреть файл
@@ -0,0 +1,185 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicAnd(int* address, int val) */
__global__ void atomicAnd_int_v1(int* address, int* result) { *result = atomicAnd(&address, 1234); }
__global__ void atomicAnd_int_v2(int* address, int* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_int_v3(int* address, int* result) { *result = atomicAnd(1234, 1234); }
__global__ void atomicAnd_int_v4(Dummy* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v5(char* address, int* result) { *result = atomicAnd(address, 1234); }
__global__ void atomicAnd_int_v6(short* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v7(long* address, int* result) { *result = atomicAnd(address, 1234); }
__global__ void atomicAnd_int_v8(long long* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v9(float* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v10(double* address, int* result) {
*result = atomicAnd(address, 1234);
}
/* unsigned int atomicAnd(unsigned int* address, unsigned int val) */
__global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicAnd(1234, 1234);
}
__global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v5(char* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v6(short* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v7(long* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v9(float* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v10(double* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
/* atomicAnd(unsigned long* address, unsigned long val) */
__global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicAnd(1234, 1234);
}
__global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
/* atomicAnd(unsigned long long* address, unsigned long long val) */
__global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicAnd(1234, 1234);
}
__global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
+223
Просмотреть файл
@@ -0,0 +1,223 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicAnd_int{
R"(
__global__ void atomicAnd_int_v1(int* address, int* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_int_v2(int* address, int* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_int_v3(int* address, int* result) {
*result = atomicAnd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAnd_int_v4(Dummy* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v5(char* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v6(short* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v7(long* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v8(long long* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v9(float* address, int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_int_v10(double* address, int* result) {
*result = atomicAnd(address, 1234);
}
)"};
static constexpr auto kAtomicAnd_uint{
R"(
__global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicAnd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v5(char* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v6(short* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v7(long* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v9(float* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_uint_v10(double* address, unsigned int* result) {
*result = atomicAnd(address, 1234);
}
)"};
static constexpr auto kAtomicAnd_ulong{
R"(
__global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicAnd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) {
*result = atomicAnd(address, 1234);
}
)"};
static constexpr auto kAtomicAnd_ulonglong{
R"(
__global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicAnd(&address, 1234);
}
__global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicAnd(address, address);
}
__global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicAnd(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
__global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) {
*result = atomicAnd(address, 1234);
}
)"};
+109
Просмотреть файл
@@ -0,0 +1,109 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicAnd_system atomicAnd_system
* @{
* @ingroup AtomicsTest
* `atomicAnd_system(TestType* address, TestType* val)` -
* performs system-wide atomic bitwise AND between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicAnd_system from multiple threads on the same address.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicAnd_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
2, 2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd_system from multiple threads on adjacent addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicAnd_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
2, 2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicAnd_system from multiple threads on scattered addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicAnd_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
+172
Просмотреть файл
@@ -0,0 +1,172 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include "atomicCAS_negative_kernels_rtc.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicCAS atomicCAS
* @{
* @ingroup AtomicsTest
*/
#ifdef HT_NVIDIA
#define TYPES
#else
#define TYPES , float, double
#endif
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition, implemented using an atomic CAS operation, on a target memory location. Each thread
* will add the same value to the memory location, storing the return value into a separate output
* array slot corresponding to it. Once complete, the output array and target memory is validated to
* contain all the expected values. Several memory access patterns are tested:
* -# All threads exchange to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicCAS
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/atomicCAS.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive", "", int, unsigned int, unsigned long long TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
* an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each
* thread will add the same value to the memory location, storing the return value into a separate
* output array slot corresponding to it. Once complete, the output array and target memory is
* validated to contain all the expected values. Several memory access patterns are tested:
* -# All threads exchange to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicCAS
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicCAS.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive_Multi_Kernel", "", int, unsigned int,
unsigned long long TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
* atomicCAS.
* Test source
* ------------------------
* - unit/atomics/atomicCAS.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicCAS_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicCAS_int, kAtomicCAS_uint, kAtomicCAS_ulong,
kAtomicCAS_ulonglong, kAtomicCAS_float, kAtomicCAS_double);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicCAS_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+62
Просмотреть файл
@@ -0,0 +1,62 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
#define ATOMIC_CAS_NEGATIVE_KERNEL(type_name) \
__global__ void atomicCAS_v1(type_name* address, type_name* result) { \
*result = atomicCAS(&address, 12, 13); \
} \
__global__ void atomicCAS_v2(type_name* address, type_name* result) { \
*result = atomicCAS(address, address, 13); \
} \
__global__ void atomicCAS_v3(type_name* address, type_name* result) { \
*result = atomicCAS(address, 12, address); \
} \
__global__ void atomicCAS_v4(Dummy* address, type_name* result) { \
*result = atomicCAS(address, 12, 13); \
} \
__global__ void atomicCAS_v5(char* address, type_name* result) { \
*result = atomicCAS(address, 12, 13); \
} \
__global__ void atomicCAS_v6(short* address, type_name* result) { \
*result = atomicCAS(address, 12, 13); \
} \
__global__ void atomicCAS_v7(long* address, type_name* result) { \
*result = atomicCAS(address, 12, 13); \
} \
__global__ void atomicCAS_v8(long long* address, type_name* result) { \
*result = atomicCAS(address, 12, 13); \
}
ATOMIC_CAS_NEGATIVE_KERNEL(int)
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned int)
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long)
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long long)
ATOMIC_CAS_NEGATIVE_KERNEL(float)
ATOMIC_CAS_NEGATIVE_KERNEL(double)
+273
Просмотреть файл
@@ -0,0 +1,273 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicCAS_int{
R"(
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicCAS_int_v1(int* address, int* result) {
*result = atomicCAS(&address, 12, 13);
}
__global__ void atomicCAS_int_v2(int* address, int* result) {
*result = atomicCAS(address, address, 13);
}
__global__ void atomicCAS_int_v3(int* address, int* result) {
*result = atomicCAS(address, 12, address);
}
__global__ void atomicCAS_int_v4(Dummy* address, int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_int_v5(char* address, int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_int_v6(short* address, int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_int_v7(long* address, int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_int_v8(long long* address, int* result) {
*result = atomicCAS(address, 12, 13);
}
)"};
static constexpr auto kAtomicCAS_uint{
R"(
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicCAS_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicCAS(&address, 12, 13);
}
__global__ void atomicCAS_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicCAS(address, address, 13);
}
__global__ void atomicCAS_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicCAS(address, 12, address);
}
__global__ void atomicCAS_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_uint_v5(char* address, unsigned int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_uint_v6(short* address, unsigned int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_uint_v7(long* address, unsigned int* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_uint_v8(long long* address, unsigned int* result) {
*result = atomicCAS(address, 12, 13);
}
)"};
static constexpr auto kAtomicCAS_ulong{
R"(
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicCAS_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicCAS(&address, 12, 13);
}
__global__ void atomicCAS_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicCAS(address, address, 13);
}
__global__ void atomicCAS_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicCAS(address, 12, address);
}
__global__ void atomicCAS_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulong_v5(char* address, unsigned long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulong_v6(short* address, unsigned long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulong_v7(long* address, unsigned long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulong_v8(long long* address, unsigned long* result) {
*result = atomicCAS(address, 12, 13);
}
)"};
static constexpr auto kAtomicCAS_ulonglong{
R"(
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicCAS_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicCAS(&address, 12, 13);
}
__global__ void atomicCAS_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicCAS(address, address, 13);
}
__global__ void atomicCAS_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicCAS(address, 12, address);
}
__global__ void atomicCAS_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicCAS(address, 12, 13);
}
)"};
static constexpr auto kAtomicCAS_float{
R"(
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicCAS_float_v1(float* address, float* result) {
*result = atomicCAS(&address, 12, 13);
}
__global__ void atomicCAS_float_v2(float* address, float* result) {
*result = atomicCAS(address, address, 13);
}
__global__ void atomicCAS_float_v3(float* address, float* result) {
*result = atomicCAS(address, 12, address);
}
__global__ void atomicCAS_float_v4(Dummy* address, float* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_float_v5(char* address, float* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_float_v6(short* address, float* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_float_v7(long* address, float* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_float_v8(long long* address, float* result) {
*result = atomicCAS(address, 12, 13);
}
)"};
static constexpr auto kAtomicCAS_double{
R"(
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicCAS_double_v1(double* address, double* result) {
*result = atomicCAS(&address, 12, 13);
}
__global__ void atomicCAS_double_v2(double* address, double* result) {
*result = atomicCAS(address, address, 13);
}
__global__ void atomicCAS_double_v3(double* address, double* result) {
*result = atomicCAS(address, 12, address);
}
__global__ void atomicCAS_double_v4(Dummy* address, double* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_double_v5(char* address, double* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_double_v6(short* address, double* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_double_v7(long* address, double* result) {
*result = atomicCAS(address, 12, 13);
}
__global__ void atomicCAS_double_v8(long long* address, double* result) {
*result = atomicCAS(address, 12, 13);
}
)"};
+185
Просмотреть файл
@@ -0,0 +1,185 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicCAS_system atomicCAS_system
* @{
* @ingroup AtomicsTest
*/
#ifdef HT_NVIDIA
#define TYPES
#else
#define TYPES , float, double
#endif
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
* an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each
* thread will add the same value to the memory location, storing the return value into a separate
* output array slot corresponding to it. Once complete, the output array and target memory is
* validated to contain all the expected values. Several memory access patterns are tested:
* -# All threads exchange to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicCAS_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicCAS_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int,
unsigned long long TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
2, 2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
2, 2, warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel on a single device wherein all threads will perform
* an atomic addition, implemented using an atomic CAS operation, on a target memory location.
* Each thread will add the same value to the memory location, storing the return value into a
* separate output array slot corresponding to it. While the kernel is running, the host
* performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the
* output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads exchange to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicCAS_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicCAS_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long long TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
1, 1, 1, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
1, 1, warp_size, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
1, 1, warp_size, cache_line_size, 4);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times on two devices wherein all threads will perform
* an atomic addition, implemented using an atomic CAS operation, on a target memory location.
* Each thread will add the same value to the memory location, storing the return value into a
* separate output array slot corresponding to it. While the kernel is running, the host
* performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the
* output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads exchange to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicCAS_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicCAS_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long long TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
2, 2, 1, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
2, 2, warp_size, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
2, 2, warp_size, cache_line_size, 4);
}
}
}
+164
Просмотреть файл
@@ -0,0 +1,164 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include "atomicDec_negative_kernels_rtc.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicDec atomicDec
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* decrement on a target memory location. Each thread will decrement the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads decrement a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicDec
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/atomicDec.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicDec_Positive", "", unsigned int) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kDec>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kDec>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kDec>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
* an atomic decrement on a target memory location. Each thread will decrement the memory
* location, storing the return value into a separate output array slot corresponding to it. Once
* complete, the output array and target memory is validated to contain all the expected values.
* Several memory access patterns are tested:
* -# All threads decrement a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicDec
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicDec.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicDec_Positive_Multi_Kernel", "", unsigned int) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kDec>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kDec>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kDec>(2, warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
* atomicDec.
* Test source
* ------------------------
* - unit/atomics/atomicDec.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicDec_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicDec_uint);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicDec_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+62
Просмотреть файл
@@ -0,0 +1,62 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* unsigned int atomicDec(unsigned int* address, unsigned int val) */
__global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicDec(&address, 1234);
}
__global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicDec(address, address);
}
__global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicDec(1234, 1234);
}
__global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v5(char* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v6(short* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v7(long* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v8(long long* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
+68
Просмотреть файл
@@ -0,0 +1,68 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicDec_uint{
R"(
__global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicDec(&address, 1234);
}
__global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicDec(address, address);
}
__global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicDec(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v5(char* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v6(short* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v7(long* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
__global__ void atomicDec_uint_v8(long long* address, unsigned int* result) {
*result = atomicDec(address, 1234);
}
)"};
+57 -16
Просмотреть файл
@@ -24,22 +24,26 @@ THE SOFTWARE.
#include <numeric>
#include <cmd_options.hh>
#include <hip_test_common.hh>
#include <resource_guards.hh>
#include <hip/hip_cooperative_groups.h>
#include <cmd_options.hh>
enum class AtomicScopes { device, system };
enum class AtomicScopes { device, system, builtin };
template <typename T, AtomicScopes scope> __device__ T perform_atomic_exch(T* address, T val) {
template <typename T, AtomicScopes scope, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__device__ T perform_atomic_exch(T* address, T val) {
if constexpr (scope == AtomicScopes::device) {
return atomicExch(address, val);
} else if (scope == AtomicScopes::system) {
return atomicExch_system(address, val);
} else if (scope == AtomicScopes::builtin) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, memory_scope);
}
}
template <typename T, bool use_shared_mem, AtomicScopes scope>
template <typename T, bool use_shared_mem, AtomicScopes scope,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const old_vals) {
__shared__ T shared_mem;
@@ -52,7 +56,7 @@ __global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const ol
__syncthreads();
}
old_vals[tid] = perform_atomic_exch<T, scope>(mem, static_cast<T>(tid + 1));
old_vals[tid] = perform_atomic_exch<T, scope, memory_scope>(mem, static_cast<T>(tid + 1));
if constexpr (use_shared_mem) {
__syncthreads();
@@ -67,7 +71,16 @@ __host__ __device__ T* pitched_offset(T* const ptr, const unsigned int pitch,
return reinterpret_cast<T*>(byte_ptr + idx * pitch);
}
template <typename T, bool use_shared_mem, AtomicScopes scope>
__device__ void generate_memory_traffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
uint8_t val = *addr;
val ^= 0xAB;
*addr = val;
}
}
template <typename T, bool use_shared_mem, AtomicScopes scope,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const unsigned int width,
const unsigned pitch, const T base_val = 0) {
extern __shared__ uint8_t shared_mem[];
@@ -84,8 +97,18 @@ __global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const
__syncthreads();
}
old_vals[tid] = perform_atomic_exch<T, scope>(pitched_offset(mem, pitch, tid % width),
base_val + static_cast<T>(tid + width));
const auto n = cooperative_groups::this_grid().size() - width;
T* atomic_addr = pitched_offset(mem, pitch, tid % width);
if (tid < n) {
old_vals[tid] = perform_atomic_exch<T, scope, memory_scope>(
pitched_offset(mem, pitch, tid % width), base_val + static_cast<T>(tid + width));
} else {
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
generate_memory_traffic(begin_addr, end_addr);
}
if constexpr (use_shared_mem) {
__syncthreads();
@@ -255,14 +278,16 @@ class AtomicExchCRTP {
}
};
template <typename T, bool use_shared_mem, AtomicScopes scope>
template <typename T, bool use_shared_mem, AtomicScopes scope,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
class AtomicExch
: public AtomicExchCRTP<AtomicExch<T, use_shared_mem, scope>, T, use_shared_mem, scope> {
public:
void LaunchKernel(const unsigned int shared_mem_size, const hipStream_t stream, T* const mem,
T* const old_vals, const T base_val, const AtomicExchParams& p) const {
atomic_exch_kernel<T, use_shared_mem, scope><<<p.blocks, p.threads, shared_mem_size, stream>>>(
mem, old_vals, p.width, p.pitch, base_val);
atomic_exch_kernel<T, use_shared_mem, scope, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem, old_vals, p.width, p.pitch,
base_val);
}
void ValidateResults(std::vector<T>& old_vals) const {
@@ -281,23 +306,39 @@ inline dim3 GenerateAtomicExchBlockDimensions() {
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
}
template <typename TestType, AtomicScopes scope>
template <typename TestType, AtomicScopes scope, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
AtomicExchParams params;
params.num_devices = 1;
params.kernel_count = 1;
params.threads = GenerateAtomicExchThreadDimensions();
if constexpr (scope == AtomicScopes::builtin && memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
params.threads = 1;
} else if constexpr (scope == AtomicScopes::builtin &&
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
params.threads = dim3(warp_size);
} else {
params.threads = GenerateAtomicExchThreadDimensions();
}
params.width = width;
params.pitch = pitch;
SECTION("Global memory") {
params.blocks = GenerateAtomicExchBlockDimensions();
if constexpr (scope == AtomicScopes::builtin &&
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
params.blocks = dim3(1);
} else {
params.blocks = GenerateAtomicExchBlockDimensions();
}
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
AtomicExch<TestType, false, scope>().run(params);
AtomicExch<TestType, false, scope, memory_scope>().run(params);
}
}
}
@@ -305,7 +346,7 @@ void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsi
SECTION("Shared memory") {
params.blocks = dim3(1);
params.alloc_type = LinearAllocs::hipMalloc;
AtomicExch<TestType, true, scope>().run(params);
AtomicExch<TestType, true, scope, memory_scope>().run(params);
}
}
+164
Просмотреть файл
@@ -0,0 +1,164 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include "atomicInc_negative_kernels_rtc.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicInc atomicInc
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* increment on a target memory location. Each thread will increment the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads increment a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicInc
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/atomicInc.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicInc_Positive", "", unsigned int) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kInc>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kInc>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kInc>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will
* perform an atomic increment on a target memory location. Each thread will increment the memory
* location, storing the return value into a separate output array slot corresponding to it. Once
* complete, the output array and target memory is validated to contain all the expected values.
* Several memory access patterns are tested:
* -# All threads increment a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicInc
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicInc.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicInc_Positive_Multi_Kernel", "", unsigned int) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kInc>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kInc>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kInc>(2, warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
* atomicInc.
* Test source
* ------------------------
* - unit/atomics/atomicInc.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicInc_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicInc_uint);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicInc_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+62
Просмотреть файл
@@ -0,0 +1,62 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* unsigned int atomicInc(unsigned int* address, unsigned int val) */
__global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicInc(&address, 1234);
}
__global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicInc(address, address);
}
__global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicInc(1234, 1234);
}
__global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v5(char* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v6(short* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v7(long* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v8(long long* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
+68
Просмотреть файл
@@ -0,0 +1,68 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicInc_uint{
R"(
__global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicInc(&address, 1234);
}
__global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicInc(address, address);
}
__global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicInc(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v5(char* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v6(short* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v7(long* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
__global__ void atomicInc_uint_v8(long long* address, unsigned int* result) {
*result = atomicInc(address, 1234);
}
)"};
+222
Просмотреть файл
@@ -0,0 +1,222 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "atomicMax_negative_kernels_rtc.hh"
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicMax atomicMax
* @{
* @ingroup AtomicsTest
* `atomicMax(TestType* address, TestType* val)` -
* calculates maximum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicMax from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_SameAddress", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax from multiple threads on the scaterred addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Scattered_Addresses", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax from multiple threads on the scaterred addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Compiles atomicMax with invalid parameters.
* - Compiles the source with RTC.
* Test source
* ------------------------
* - unit/atomics/atomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicMax_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicMax_int, kAtomicMax_uint, kAtomicMax_ulong,
kAtomicMax_ulonglong, kAtomicMax_float, kAtomicMax_double);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicMax_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
// Please check the content of negative_kernels_rtc.hh
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+219
Просмотреть файл
@@ -0,0 +1,219 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicMax(int* address, int val) */
__global__ void atomicMax_int_v1(int* address, int* result) { *result = atomicMax(&address, 1234); }
__global__ void atomicMax_int_v2(int* address, int* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_int_v3(int* address, int* result) { *result = atomicMax(1234, 1234); }
__global__ void atomicMax_int_v4(Dummy* address, int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_int_v5(char* address, int* result) { *result = atomicMax(address, 1234); }
__global__ void atomicMax_int_v6(short* address, int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_int_v7(long* address, int* result) { *result = atomicMax(address, 1234); }
__global__ void atomicMax_int_v8(long long* address, int* result) {
*result = atomicMax(address, 1234);
}
/* unsigned int atomicMax(unsigned int* address, unsigned int val) */
__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicMax(1234, 1234);
}
__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v5(char* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v6(short* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v7(long* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
/* atomicMax(unsigned long* address, unsigned long val) */
__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicMax(1234, 1234);
}
__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
/* atomicMax(unsigned long long* address, unsigned long long val) */
__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicMax(1234, 1234);
}
__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
/* atomicMax(float* address, float val) */
__global__ void atomicMax_float_v1(float* address, float* result) {
*result = atomicMax(&address, 1234.f);
}
__global__ void atomicMax_float_v2(float* address, float* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_float_v3(float* address, float* result) {
*result = atomicMax(1234.f, 1234.f);
}
__global__ void atomicMax_float_v4(Dummy* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v5(char* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v6(short* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v7(long* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v8(long long* address, float* result) {
*result = atomicMax(address, 1234);
}
/* atomicMax(double* address, double val) */
__global__ void atomicMax_double_v1(double* address, double* result) {
*result = atomicMax(&address, 1234.0);
}
__global__ void atomicMax_double_v2(double* address, double* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_double_v3(double* address, double* result) {
*result = atomicMax(1234.0, 1234.0);
}
__global__ void atomicMax_double_v4(Dummy* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v5(char* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v6(short* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v7(long* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v8(long long* address, double* result) {
*result = atomicMax(address, 1234.0);
}
+273
Просмотреть файл
@@ -0,0 +1,273 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicMax_int{
R"(
__global__ void atomicMax_int_v1(int* address, int* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_int_v2(int* address, int* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_int_v3(int* address, int* result) {
*result = atomicMax(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMax_int_v4(Dummy* address, int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_int_v5(char* address, int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_int_v6(short* address, int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_int_v7(long* address, int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_int_v8(long long* address, int* result) {
*result = atomicMax(address, 1234);
}
)"};
static constexpr auto kAtomicMax_uint{
R"(
__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicMax(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v5(char* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v6(short* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v7(long* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) {
*result = atomicMax(address, 1234);
}
)"};
static constexpr auto kAtomicMax_ulong{
R"(
__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicMax(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) {
*result = atomicMax(address, 1234);
}
)"};
static constexpr auto kAtomicMax_ulonglong{
R"(
__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicMax(&address, 1234);
}
__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicMax(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicMax(address, 1234);
}
)"};
static constexpr auto kAtomicMax_float{
R"(
__global__ void atomicMax_float_v1(float* address, float* result) {
*result = atomicMax(&address, 1234.f);
}
__global__ void atomicMax_float_v2(float* address, float* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_float_v3(float* address, float* result) {
*result = atomicMax(1234.f, 1234.f);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMax_float_v4(Dummy* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v5(char* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v6(short* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v7(long* address, float* result) {
*result = atomicMax(address, 1234.f);
}
__global__ void atomicMax_float_v8(long long* address, float* result) {
*result = atomicMax(address, 1234);
}
)"};
static constexpr auto kAtomicMax_double{
R"(
__global__ void atomicMax_double_v1(double* address, double* result) {
*result = atomicMax(&address, 1234.0);
}
__global__ void atomicMax_double_v2(double* address, double* result) {
*result = atomicMax(address, address);
}
__global__ void atomicMax_double_v3(double* address, double* result) {
*result = atomicMax(1234.0, 1234.0);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMax_double_v4(Dummy* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v5(char* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v6(short* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v7(long* address, double* result) {
*result = atomicMax(address, 1234.0);
}
__global__ void atomicMax_double_v8(long long* address, double* result) {
*result = atomicMax(address, 1234.0);
}
)"};
+124
Просмотреть файл
@@ -0,0 +1,124 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicMax_system atomicMax_system
* @{
* @ingroup AtomicsTest
* `atomicMax_system(TestType* address, TestType* val)` -
* performs system-wide atomic maximum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicMax_system from multiple threads on the same address.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMax_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
#endif
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMaxSystem>(
2, 2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax_system from multiple threads on adjacent addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMax_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMaxSystem>(
2, 2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMax_system from multiple threads on scaterred addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMax_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMaxSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
+222
Просмотреть файл
@@ -0,0 +1,222 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "atomicMin_negative_kernels_rtc.hh"
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicMin atomicMin
* @{
* @ingroup AtomicsTest
* `atomicMin(TestType* address, TestType* val)` -
* calculates minimum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicMin from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_SameAddress", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin from multiple threads on the scaterred addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Scattered_Addresses", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin from multiple threads on the scaterred addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Compiles atomicMin with invalid parameters.
* - Compiles the source with RTC.
* Test source
* ------------------------
* - unit/atomics/atomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicMin_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicMin_int, kAtomicMin_uint, kAtomicMin_ulong,
kAtomicMin_ulonglong, kAtomicMin_float, kAtomicMin_double);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicMin_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
// Please check the content of negative_kernels_rtc.hh
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+219
Просмотреть файл
@@ -0,0 +1,219 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicMin(int* address, int val) */
__global__ void atomicMin_int_v1(int* address, int* result) { *result = atomicMin(&address, 1234); }
__global__ void atomicMin_int_v2(int* address, int* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_int_v3(int* address, int* result) { *result = atomicMin(1234, 1234); }
__global__ void atomicMin_int_v4(Dummy* address, int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_int_v5(char* address, int* result) { *result = atomicMin(address, 1234); }
__global__ void atomicMin_int_v6(short* address, int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_int_v7(long* address, int* result) { *result = atomicMin(address, 1234); }
__global__ void atomicMin_int_v8(long long* address, int* result) {
*result = atomicMin(address, 1234);
}
/* unsigned int atomicMin(unsigned int* address, unsigned int val) */
__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicMin(1234, 1234);
}
__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v5(char* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v6(short* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v7(long* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
/* atomicMin(unsigned long* address, unsigned long val) */
__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicMin(1234, 1234);
}
__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
/* atomicMin(unsigned long long* address, unsigned long long val) */
__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicMin(1234, 1234);
}
__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
/* atomicMin(float* address, float val) */
__global__ void atomicMin_float_v1(float* address, float* result) {
*result = atomicMin(&address, 1234.f);
}
__global__ void atomicMin_float_v2(float* address, float* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_float_v3(float* address, float* result) {
*result = atomicMin(1234.f, 1234.f);
}
__global__ void atomicMin_float_v4(Dummy* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v5(char* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v6(short* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v7(long* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v8(long long* address, float* result) {
*result = atomicMin(address, 1234);
}
/* atomicMin(double* address, double val) */
__global__ void atomicMin_double_v1(double* address, double* result) {
*result = atomicMin(&address, 1234.0);
}
__global__ void atomicMin_double_v2(double* address, double* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_double_v3(double* address, double* result) {
*result = atomicMin(1234.0, 1234.0);
}
__global__ void atomicMin_double_v4(Dummy* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v5(char* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v6(short* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v7(long* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v8(long long* address, double* result) {
*result = atomicMin(address, 1234.0);
}
+273
Просмотреть файл
@@ -0,0 +1,273 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicMin_int{
R"(
__global__ void atomicMin_int_v1(int* address, int* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_int_v2(int* address, int* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_int_v3(int* address, int* result) {
*result = atomicMin(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMin_int_v4(Dummy* address, int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_int_v5(char* address, int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_int_v6(short* address, int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_int_v7(long* address, int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_int_v8(long long* address, int* result) {
*result = atomicMin(address, 1234);
}
)"};
static constexpr auto kAtomicMin_uint{
R"(
__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicMin(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v5(char* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v6(short* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v7(long* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) {
*result = atomicMin(address, 1234);
}
)"};
static constexpr auto kAtomicMin_ulong{
R"(
__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicMin(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) {
*result = atomicMin(address, 1234);
}
)"};
static constexpr auto kAtomicMin_ulonglong{
R"(
__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicMin(&address, 1234);
}
__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicMin(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicMin(address, 1234);
}
)"};
static constexpr auto kAtomicMin_float{
R"(
__global__ void atomicMin_float_v1(float* address, float* result) {
*result = atomicMin(&address, 1234.f);
}
__global__ void atomicMin_float_v2(float* address, float* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_float_v3(float* address, float* result) {
*result = atomicMin(1234.f, 1234.f);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMin_float_v4(Dummy* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v5(char* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v6(short* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v7(long* address, float* result) {
*result = atomicMin(address, 1234.f);
}
__global__ void atomicMin_float_v8(long long* address, float* result) {
*result = atomicMin(address, 1234);
}
)"};
static constexpr auto kAtomicMin_double{
R"(
__global__ void atomicMin_double_v1(double* address, double* result) {
*result = atomicMin(&address, 1234.0);
}
__global__ void atomicMin_double_v2(double* address, double* result) {
*result = atomicMin(address, address);
}
__global__ void atomicMin_double_v3(double* address, double* result) {
*result = atomicMin(1234.0, 1234.0);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicMin_double_v4(Dummy* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v5(char* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v6(short* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v7(long* address, double* result) {
*result = atomicMin(address, 1234.0);
}
__global__ void atomicMin_double_v8(long long* address, double* result) {
*result = atomicMin(address, 1234.0);
}
)"};
+124
Просмотреть файл
@@ -0,0 +1,124 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicMin_system atomicMin_system
* @{
* @ingroup AtomicsTest
* `atomicMin_system(TestType* address, TestType* val)` -
* performs system-wide atomic minimum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicMin_system from multiple threads on the same address.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMin_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
#endif
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMinSystem>(
2, 2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin_system from multiple threads on adjacent addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMin_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMinSystem>(
2, 2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicMin_system from multiple threads on scaterred addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicMin_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMinSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
+222
Просмотреть файл
@@ -0,0 +1,222 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "atomicOr_negative_kernels_rtc.hh"
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicOr atomicOr
* @{
* @ingroup AtomicsTest
* `atomicOr(TestType* address, TestType* val)` -
* performs atomic bitwise OR between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicOr from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_SameAddress", "", int, unsigned int, unsigned long,
unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Scattered_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Compiles atomicAnd with invalid parameters.
* - Compiles the source with RTC.
* Test source
* ------------------------
* - unit/atomics/atomicOr.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicOr_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source =
GENERATE(kAtomicOr_int, kAtomicOr_uint, kAtomicOr_ulong, kAtomicOr_ulonglong);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicOr_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
// Please check the content of negative_kernels_rtc.hh
int expected_error_count{9};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+177
Просмотреть файл
@@ -0,0 +1,177 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicOr(int* address, int val) */
__global__ void atomicOr_int_v1(int* address, int* result) { *result = atomicOr(&address, 1234); }
__global__ void atomicOr_int_v2(int* address, int* result) { *result = atomicOr(address, address); }
__global__ void atomicOr_int_v3(int* address, int* result) { *result = atomicOr(1234, 1234); }
__global__ void atomicOr_int_v4(Dummy* address, int* result) { *result = atomicOr(address, 1234); }
__global__ void atomicOr_int_v5(char* address, int* result) { *result = atomicOr(address, 1234); }
__global__ void atomicOr_int_v6(short* address, int* result) { *result = atomicOr(address, 1234); }
__global__ void atomicOr_int_v7(long* address, int* result) { *result = atomicOr(address, 1234); }
__global__ void atomicOr_int_v8(long long* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v9(float* address, int* result) { *result = atomicOr(address, 1234); }
__global__ void atomicOr_int_v10(double* address, int* result) {
*result = atomicOr(address, 1234);
}
/* unsigned int atomicOr(unsigned int* address, unsigned int val) */
__global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicOr(1234, 1234);
}
__global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v5(char* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v6(short* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v7(long* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v8(long long* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v9(float* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v10(double* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
/* atomicOr(unsigned long* address, unsigned long val) */
__global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicOr(1234, 1234);
}
__global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v5(char* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v6(short* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v7(long* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v9(float* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v10(double* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
/* atomicOr(unsigned long long* address, unsigned long long val) */
__global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicOr(1234, 1234);
}
__global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
+223
Просмотреть файл
@@ -0,0 +1,223 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicOr_int{
R"(
__global__ void atomicOr_int_v1(int* address, int* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_int_v2(int* address, int* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_int_v3(int* address, int* result) {
*result = atomicOr(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicOr_int_v4(Dummy* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v5(char* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v6(short* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v7(long* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v8(long long* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v9(float* address, int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_int_v10(double* address, int* result) {
*result = atomicOr(address, 1234);
}
)"};
static constexpr auto kAtomicOr_uint{
R"(
__global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicOr(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v5(char* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v6(short* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v7(long* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v8(long long* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v9(float* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_uint_v10(double* address, unsigned int* result) {
*result = atomicOr(address, 1234);
}
)"};
static constexpr auto kAtomicOr_ulong{
R"(
__global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicOr(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v5(char* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v6(short* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v7(long* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v9(float* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulong_v10(double* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
)"};
static constexpr auto kAtomicOr_ulonglong{
R"(
__global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicOr(&address, 1234);
}
__global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicOr(address, address);
}
__global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicOr(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
)"};
+109
Просмотреть файл
@@ -0,0 +1,109 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicOr_system atomicOr_system
* @{
* @ingroup AtomicsTest
* `atomicOr_system(TestType* address, TestType* val)` -
* performs system-wide atomic bitwise OR between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicOr_system from multiple threads on the same address.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicOr_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
2, 2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr_system from multiple threads on adjacent addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicOr_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
2, 2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicOr_system from multiple threads on scattered addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicOr_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
+167
Просмотреть файл
@@ -0,0 +1,167 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include "atomicSub_negative_kernels_rtc.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicSub atomicSub
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* subtraction on a target memory location. Each thread will subtract the same value from the memory
* location, storing the return value into a separate output array slot corresponding to it. Once
* complete, the output array and target memory is validated to contain all the expected values.
* Several memory access patterns are tested:
* -# All threads subtract from a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicSub
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/atomicSub.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_Positive", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSub>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSub>(warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSub>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
* an atomic subtraction on a target memory location. Each thread will subtract the same value from
* the memory location, storing the return value into a separate output array slot corresponding to
* it. Once complete, the output array and target memory is validated to contain all the expected
* values. Several memory access patterns are tested:
* -# All threads subtract from a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicSub
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicSub.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_Positive_Multi_Kernel", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSub>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSub>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSub>(2, warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
* atomicSub.
* Test source
* ------------------------
* - unit/atomics/atomicSub.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicSub_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source = GENERATE(kAtomicSub_int, kAtomicSub_uint, kAtomicSub_ulong,
kAtomicSub_ulonglong, kAtomicSub_float, kAtomicSub_double);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicSub_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{8};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+219
Просмотреть файл
@@ -0,0 +1,219 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicSub(int* address, int val) */
__global__ void atomicSub_int_v1(int* address, int* result) { *result = atomicSub(&address, 1234); }
__global__ void atomicSub_int_v2(int* address, int* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_int_v3(int* address, int* result) { *result = atomicSub(1234, 1234); }
__global__ void atomicSub_int_v4(Dummy* address, int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_int_v5(char* address, int* result) { *result = atomicSub(address, 1234); }
__global__ void atomicSub_int_v6(short* address, int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_int_v7(long* address, int* result) { *result = atomicSub(address, 1234); }
__global__ void atomicSub_int_v8(long long* address, int* result) {
*result = atomicSub(address, 1234);
}
/* unsigned int atomicSub(unsigned int* address, unsigned int val) */
__global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicSub(1234, 1234);
}
__global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v5(char* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v6(short* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v7(long* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v8(long long* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
/* atomicSub(unsigned long* address, unsigned long val) */
__global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicSub(1234, 1234);
}
__global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v5(char* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v6(short* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v7(long* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
/* atomicSub(unsigned long long* address, unsigned long long val) */
__global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicSub(1234, 1234);
}
__global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
/* atomicSub(float* address, float val) */
__global__ void atomicSub_float_v1(float* address, float* result) {
*result = atomicSub(&address, 1234.f);
}
__global__ void atomicSub_float_v2(float* address, float* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_float_v3(float* address, float* result) {
*result = atomicSub(1234.f, 1234.f);
}
__global__ void atomicSub_float_v4(Dummy* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v5(char* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v6(short* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v7(long* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v8(long long* address, float* result) {
*result = atomicSub(address, 1234);
}
/* atomicSub(double* address, double val) */
__global__ void atomicSub_double_v1(double* address, double* result) {
*result = atomicSub(&address, 1234.0);
}
__global__ void atomicSub_double_v2(double* address, double* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_double_v3(double* address, double* result) {
*result = atomicSub(1234.0, 1234.0);
}
__global__ void atomicSub_double_v4(Dummy* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v5(char* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v6(short* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v7(long* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v8(long long* address, double* result) {
*result = atomicSub(address, 1234.0);
}
+273
Просмотреть файл
@@ -0,0 +1,273 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicSub_int{
R"(
__global__ void atomicSub_int_v1(int* address, int* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_int_v2(int* address, int* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_int_v3(int* address, int* result) {
*result = atomicSub(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicSub_int_v4(Dummy* address, int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_int_v5(char* address, int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_int_v6(short* address, int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_int_v7(long* address, int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_int_v8(long long* address, int* result) {
*result = atomicSub(address, 1234);
}
)"};
static constexpr auto kAtomicSub_uint{
R"(
__global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicSub(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v5(char* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v6(short* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v7(long* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_uint_v8(long long* address, unsigned int* result) {
*result = atomicSub(address, 1234);
}
)"};
static constexpr auto kAtomicSub_ulong{
R"(
__global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicSub(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v5(char* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v6(short* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v7(long* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) {
*result = atomicSub(address, 1234);
}
)"};
static constexpr auto kAtomicSub_ulonglong{
R"(
__global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicSub(&address, 1234);
}
__global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicSub(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
__global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicSub(address, 1234);
}
)"};
static constexpr auto kAtomicSub_float{
R"(
__global__ void atomicSub_float_v1(float* address, float* result) {
*result = atomicSub(&address, 1234.f);
}
__global__ void atomicSub_float_v2(float* address, float* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_float_v3(float* address, float* result) {
*result = atomicSub(1234.f, 1234.f);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicSub_float_v4(Dummy* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v5(char* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v6(short* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v7(long* address, float* result) {
*result = atomicSub(address, 1234.f);
}
__global__ void atomicSub_float_v8(long long* address, float* result) {
*result = atomicSub(address, 1234);
}
)"};
static constexpr auto kAtomicSub_double{
R"(
__global__ void atomicSub_double_v1(double* address, double* result) {
*result = atomicSub(&address, 1234.0);
}
__global__ void atomicSub_double_v2(double* address, double* result) {
*result = atomicSub(address, address);
}
__global__ void atomicSub_double_v3(double* address, double* result) {
*result = atomicSub(1234.0, 1234.0);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicSub_double_v4(Dummy* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v5(char* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v6(short* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v7(long* address, double* result) {
*result = atomicSub(address, 1234.0);
}
__global__ void atomicSub_double_v8(long long* address, double* result) {
*result = atomicSub(address, 1234.0);
}
)"};
+177
Просмотреть файл
@@ -0,0 +1,177 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicSub_system atomicSub_system
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the memory
* location, storing the return value into a separate output array slot corresponding to it. Once
* complete, the output array and target memory is validated to contain all the expected values.
* Several memory access patterns are tested:
* -# All threads subtract from a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicSub_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicSub_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
2, 2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
2, 2, warp_size, sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel on a single device wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the
* memory location, storing the return value into a separate output array slot corresponding to
* it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same
* memory location(s). Once complete, the output array and target memory is validated to contain
* all the expected values. Several memory access patterns are tested:
* -# All threads subtract from a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicSub_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicSub_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
1, 1, 1, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
1, 1, warp_size, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
1, 1, warp_size, cache_line_size, 4);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times on two devices wherein all threads will perform
* an atomic addition on a target memory location. Each thread will add the same value to the
* memory location, storing the return value into a separate output array slot corresponding to
* it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same
* memory location(s). Once complete, the output array and target memory is validated to contain
* all the expected values. Several memory access patterns are tested:
* -# All threads subtract from a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of atomicSub_system
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/atomicSub_system.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
2, 2, 1, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
2, 2, warp_size, sizeof(TestType), 4);
}
DYNAMIC_SECTION("Scattered addresses " << current) {
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
2, 2, warp_size, cache_line_size, 4);
}
}
}
+222
Просмотреть файл
@@ -0,0 +1,222 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "atomicXor_negative_kernels_rtc.hh"
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicXor atomicXor
* @{
* @ingroup AtomicsTest
* `atomicXor(TestType* address, TestType* val)` -
* performs atomic bitwise XOR between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicXor from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_SameAddress", "", int, unsigned int, unsigned long,
unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Scattered_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
2, warp_size - 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
2, warp_size - 1, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Compiles atomicXor with invalid parameters.
* - Compiles the source with RTC.
* Test source
* ------------------------
* - unit/atomics/atomicXor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_atomicXor_Negative_Parameters_RTC") {
hiprtcProgram program{};
const auto program_source =
GENERATE(kAtomicXor_int, kAtomicXor_uint, kAtomicXor_ulong, kAtomicXor_ulonglong);
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "atomicXor_negative.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
// Please check the content of negative_kernels_rtc.hh
int expected_error_count{9};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
+185
Просмотреть файл
@@ -0,0 +1,185 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
/* int atomicXor(int* address, int val) */
__global__ void atomicXor_int_v1(int* address, int* result) { *result = atomicXor(&address, 1234); }
__global__ void atomicXor_int_v2(int* address, int* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_int_v3(int* address, int* result) { *result = atomicXor(1234, 1234); }
__global__ void atomicXor_int_v4(Dummy* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v5(char* address, int* result) { *result = atomicXor(address, 1234); }
__global__ void atomicXor_int_v6(short* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v7(long* address, int* result) { *result = atomicXor(address, 1234); }
__global__ void atomicXor_int_v8(long long* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v9(float* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v10(double* address, int* result) {
*result = atomicXor(address, 1234);
}
/* unsigned int atomicXor(unsigned int* address, unsigned int val) */
__global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicXor(1234, 1234);
}
__global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v5(char* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v6(short* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v7(long* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v8(long long* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v9(float* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v10(double* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
/* atomicXor(unsigned long* address, unsigned long val) */
__global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicXor(1234, 1234);
}
__global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v5(char* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v6(short* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v7(long* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v9(float* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicXor_ulong_v10(double* address, unsigned long* result) {
*result = atomicOr(address, 1234);
}
/* atomicXor(unsigned long long* address, unsigned long long val) */
__global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicXor(1234, 1234);
}
__global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) {
*result = atomicOr(address, 1234);
}
+223
Просмотреть файл
@@ -0,0 +1,223 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Negative kernels used for the atomics negative Test Cases that are using RTC.
*/
static constexpr auto kAtomicXor_int{
R"(
__global__ void atomicXor_int_v1(int* address, int* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_int_v2(int* address, int* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_int_v3(int* address, int* result) {
*result = atomicXor(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicXor_int_v4(Dummy* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v5(char* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v6(short* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v7(long* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v8(long long* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v9(float* address, int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_int_v10(double* address, int* result) {
*result = atomicXor(address, 1234);
}
)"};
static constexpr auto kAtomicXor_uint{
R"(
__global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) {
*result = atomicXor(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v5(char* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v6(short* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v7(long* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v8(long long* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v9(float* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_uint_v10(double* address, unsigned int* result) {
*result = atomicXor(address, 1234);
}
)"};
static constexpr auto kAtomicXor_ulong{
R"(
__global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) {
*result = atomicXor(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v5(char* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v6(short* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v7(long* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v9(float* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulong_v10(double* address, unsigned long* result) {
*result = atomicXor(address, 1234);
}
)"};
static constexpr auto kAtomicXor_ulonglong{
R"(
__global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
*result = atomicXor(&address, 1234);
}
__global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
*result = atomicXor(address, address);
}
__global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
*result = atomicXor(1234, 1234);
}
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v9(float* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
__global__ void atomicXor_ulonglong_v10(double* address, unsigned long long* result) {
*result = atomicXor(address, 1234);
}
)"};
+109
Просмотреть файл
@@ -0,0 +1,109 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "bitwise_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup atomicXor_system atomicXor_system
* @{
* @ingroup AtomicsTest
* `atomicXor_system(TestType* address, TestType* val)` -
* performs system-wide atomic bitwise XOR between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs atomicXor_system from multiple threads on the same address.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicXor_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
2, 2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor_system from multiple threads on adjacent addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicXor_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
2, 2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs atomicXor_system from multiple threads on scattered addresses.
* - Uses multiple devices and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/atomicXor_system.cc
* Test requirements
* ------------------------
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
2, 2, warp_size, cache_line_size);
}
}
}
+458
Просмотреть файл
@@ -0,0 +1,458 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <resource_guards.hh>
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
// Trivially-copyable class.
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void StoreCompileKernel(int* x) {
// Valid combinations
__hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
__hip_atomic_store(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
__hip_atomic_store(*x, 1, kMemOrder, kMemScope);
// Consume not allowed by C++1 for store
__hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope);
// Acquire not allowed by C++11 for store
__hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope);
// Acquire-Release not allowed by C++11 for store
__hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope);
// Memory order is out of bounds
__hip_atomic_store(x, 1, -1, kMemScope);
__hip_atomic_store(x, 1, 10, kMemScope);
// Memory scope is out of bounds
__hip_atomic_store(x, 1, kMemOrder, -1);
__hip_atomic_store(x, 1, kMemOrder, 10);
// Storing an object that is not trivially-copyable
Dummy dummy_a{};
Dummy dummy_b{};
__hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope);
// Storing an object that is trivially-copyable
DummyTC dummytc_a{};
DummyTC dummytc_b{};
__hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
}
__global__ void LoadCompileKernel(int* x, int* y) {
// Valid combinations
*y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope);
// Value instead of pointer to the atomic builtin for 1st parameter
*y = __hip_atomic_load(*x, kMemOrder, kMemScope);
// Release not allowed by C++11 for load
*y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope);
// Acquire-Release not allowed by C++11 for load
*y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope);
// Memory order is out of bounds
*y = __hip_atomic_load(x, -1, kMemScope);
*y = __hip_atomic_load(x, 10, kMemScope);
// Memory scope is out of bounds
*y = __hip_atomic_load(x, kMemOrder, -1);
*y = __hip_atomic_load(x, kMemOrder, 10);
// Loading an object that is not trivially-copyable
Dummy dummy_a{};
Dummy dummy_b{};
dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope);
// Loading an object that is trivially-copyable
DummyTC dummytc_a{};
DummyTC dummytc_b{};
dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope);
}
__global__ void CompareWeakCompileKernel(int* x, int* expected) {
bool res{false};
// Valid combinations
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
kMemScope);
// Release not allowed on fail by C++11
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
// Acquire-Release not allowed on fail by C++11
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
// Fail stronger than success
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
kMemScope);
// Pointer to a non-const type
res = __hip_atomic_compare_exchange_weak(reinterpret_cast<const int*>(x), expected, 1, kMemOrder,
kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
// Memory order on success is out of bounds
res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope);
// Memory order on failure is out of bounds
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope);
// Memory scope is out of bounds
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10);
// User-defined class is not trivially-copyable and therefore cannot be atomically copied
Dummy dummy_a{};
Dummy dummy_b{};
Dummy dummy_c{};
res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
kMemScope);
// User-defined class is trivially-copyable and can be atomically copied
DummyTC dummytc_a{};
DummyTC dummytc_b{};
DummyTC dummytc_c{};
res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder,
kMemScope);
}
__global__ void CompareStrongCompileKernel(int* x, int* expected) {
bool res{false};
// Valid combinations
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
kMemScope);
// Release not allowed on fail by C++11
res =
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
// Acquire-Release not allowed on fail by C++11
res =
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
// Fail stronger than success
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
kMemScope);
// Pointer to a non-const type
res = __hip_atomic_compare_exchange_strong(reinterpret_cast<const int*>(x), expected, 1,
kMemOrder, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin for 1st parameter
res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
// Memory order on success is out of bounds
res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope);
// Memory order on failure is out of bounds
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope);
// Memory scope is out of bounds
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10);
// User-defined class is not trivially-copyable and therefore cannot be atomically copied
Dummy dummy_a{};
Dummy dummy_b{};
Dummy dummy_c{};
res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
kMemScope);
// User-defined class is trivially-copyable and can be atomically copied
DummyTC dummytc_a{};
DummyTC dummytc_b{};
DummyTC dummytc_c{};
res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder,
kMemOrder, kMemScope);
}
__global__ void ExchangeCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_exchange(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_exchange(x, 1, -1, kMemScope);
old = __hip_atomic_exchange(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_exchange(x, 1, kMemOrder, -1);
old = __hip_atomic_exchange(x, 1, kMemOrder, 10);
// User-defined class is not trivially-copyable and therefore cannot be atomically copied
Dummy dummy_a{};
Dummy dummy_b{};
dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope);
// User-defined class is trivially-copyable and can be atomically copied
DummyTC dummytc_a{};
DummyTC dummytc_b{};
dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
}
__global__ void FetchAddCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_fetch_add(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_fetch_add(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_add(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope);
}
__global__ void FetchAndCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_fetch_and(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_fetch_and(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_and(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10);
// Value must be an integer
Dummy dummy{};
old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope);
float float_var{1.5f};
old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope);
double double_var{1.5};
old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope);
}
__global__ void FetchOrCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_fetch_or(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_fetch_or(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_or(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10);
// Value must be an integer
Dummy dummy{};
old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope);
float float_var{1.5f};
old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope);
double double_var{1.5};
old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope);
}
__global__ void FetchXorCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_fetch_xor(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10);
// Value must be an integer
Dummy dummy{};
old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope);
float float_var{1.5f};
old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope);
double double_var{1.5};
old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope);
}
__global__ void FetchMaxCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_fetch_max(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_fetch_max(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_max(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10);
// Value must be integer or floating point type
Dummy dummy{};
old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope);
}
__global__ void FetchMinCompileKernel(int* x) {
int old{};
// Valid combinations
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope);
// Pointer to a non-const type
old = __hip_atomic_fetch_min(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
// Value instead of pointer to the atomic builtin
old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope);
// Memory order out of bounds
old = __hip_atomic_fetch_min(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_min(x, 1, 10, kMemScope);
// Memory scope out of bounds
old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10);
// Value must be integer or floating point type
Dummy dummy{};
old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope);
}
+97
Просмотреть файл
@@ -0,0 +1,97 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <resource_guards.hh>
#include "atomic_builtins_kernels_rtc.hh"
/**
* @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add
* @{
* @ingroup AtomicsTest
*/
void AtomicBuiltinsRTCWrapper(const char* program_source, int expected_errors_num,
int expected_warnings_num) {
hiprtcProgram program{};
HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "atomics_builtins_kernels.cc", 0,
nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int warning_count{0};
std::string error_message{"error:"};
std::string warning_message{"warning:"};
size_t npos_e = log.find(error_message, 0);
while (npos_e != std::string::npos) {
++error_count;
npos_e = log.find(error_message, npos_e + 1);
}
size_t npos_w = log.find(warning_message, 0);
while (npos_w != std::string::npos) {
++warning_count;
npos_w = log.find(warning_message, npos_w + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_errors_num);
REQUIRE(warning_count == expected_warnings_num);
}
/**
* Test Description
* ------------------------
* - Compiles atomic builtins while passing parameters that shall cause:
* -# Compiler warnings
* -# Compiler errors
* Test source
* ------------------------
* - unit/atomics/atomic_builtins.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_AtomicBuiltins_Negative_Parameters_RTC") {
AtomicBuiltinsRTCWrapper(kBuiltinStore, 5, 5);
AtomicBuiltinsRTCWrapper(kBuiltinLoad, 4, 4);
/* Begin: Should be 5 errors, 6 warnings for both. See EXSWHTEC-309*/
AtomicBuiltinsRTCWrapper(kBuiltinCompExWeak, 5, 2);
AtomicBuiltinsRTCWrapper(kBuiltinCompExStrong, 5, 2);
/* End. */
AtomicBuiltinsRTCWrapper(kBuiltinExchange, 5, 2);
AtomicBuiltinsRTCWrapper(kBuiltinFetchAdd, 5, 2);
AtomicBuiltinsRTCWrapper(kBuiltinFetchAnd, 7, 2);
AtomicBuiltinsRTCWrapper(kBuiltinFetchOr, 7, 2);
AtomicBuiltinsRTCWrapper(kBuiltinFetchXor, 7, 2);
AtomicBuiltinsRTCWrapper(kBuiltinFetchMax, 5, 2);
AtomicBuiltinsRTCWrapper(kBuiltinFetchMin, 5, 2);
}
+590
Просмотреть файл
@@ -0,0 +1,590 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Positive and negative kernels used for the builtin atomic Test Cases that are using RTC.
*/
static constexpr auto kBuiltinStore{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void StoreCompileKernel(int* x) {
__hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope);
__hip_atomic_store(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
__hip_atomic_store(*x, 1, kMemOrder, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope);
__hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope);
__hip_atomic_store(x, 1, -1, kMemScope);
__hip_atomic_store(x, 1, 10, kMemScope);
__hip_atomic_store(x, 1, kMemOrder, -1);
__hip_atomic_store(x, 1, kMemOrder, 10);
Dummy dummy_a{};
Dummy dummy_b{};
__hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope);
DummyTC dummytc_a{};
DummyTC dummytc_b{};
__hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinLoad{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void LoadCompileKernel(int* x, int* y) {
*y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope);
*y = __hip_atomic_load(*x, kMemOrder, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope);
*y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope);
*y = __hip_atomic_load(x, -1, kMemScope);
*y = __hip_atomic_load(x, 10, kMemScope);
*y = __hip_atomic_load(x, kMemOrder, -1);
*y = __hip_atomic_load(x, kMemOrder, 10);
Dummy dummy_a{};
Dummy dummy_b{};
dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope);
DummyTC dummytc_a{};
DummyTC dummytc_b{};
dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinCompExWeak{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void CompareWeakCompileKernel(int* x, int* expected) {
bool res{false};
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
kMemScope);
res = __hip_atomic_compare_exchange_weak(reinterpret_cast<const int*>(x), expected, 1, kMemOrder,
kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1);
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10);
Dummy dummy_a{};
Dummy dummy_b{};
Dummy dummy_c{};
res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
kMemScope);
DummyTC dummytc_a{};
DummyTC dummytc_b{};
DummyTC dummytc_c{};
res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder,
kMemScope);
}
)"};
static constexpr auto kBuiltinCompExStrong{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void CompareStrongCompileKernel(int* x, int* expected) {
bool res{false};
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
kMemScope);
res =
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
res =
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
kMemScope);
res = __hip_atomic_compare_exchange_strong(reinterpret_cast<const int*>(x), expected, 1,
kMemOrder, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1);
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10);
Dummy dummy_a{};
Dummy dummy_b{};
Dummy dummy_c{};
res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
kMemScope);
DummyTC dummytc_a{};
DummyTC dummytc_b{};
DummyTC dummytc_c{};
res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder,
kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinExchange{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void ExchangeCompileKernel(int* x) {
int old{};
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_exchange(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_exchange(x, 1, -1, kMemScope);
old = __hip_atomic_exchange(x, 1, 10, kMemScope);
old = __hip_atomic_exchange(x, 1, kMemOrder, -1);
old = __hip_atomic_exchange(x, 1, kMemOrder, 10);
Dummy dummy_a{};
Dummy dummy_b{};
dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope);
DummyTC dummytc_a{};
DummyTC dummytc_b{};
dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinFetchAdd{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void FetchAddCompileKernel(int* x) {
int old{};
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_fetch_add(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_add(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_add(x, 1, 10, kMemScope);
old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinFetchAnd{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void FetchAndCompileKernel(int* x) {
int old{};
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_fetch_and(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_and(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_and(x, 1, 10, kMemScope);
old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope);
float float_var{1.5f};
old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope);
double double_var{1.5};
old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinFetchOr{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void FetchOrCompileKernel(int* x) {
int old{};
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_fetch_or(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_or(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_or(x, 1, 10, kMemScope);
old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope);
float float_var{1.5f};
old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope);
double double_var{1.5};
old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope);
}
)"};
static auto constexpr kBuiltinFetchXor{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void FetchXorCompileKernel(int* x) {
int old{};
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_fetch_xor(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope);
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope);
float float_var{1.5f};
old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope);
double double_var{1.5};
old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinFetchMax{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void FetchMaxCompileKernel(int* x) {
int old{};
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_fetch_max(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_max(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_max(x, 1, 10, kMemScope);
old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope);
}
)"};
static constexpr auto kBuiltinFetchMin{R"(
constexpr int kMemOrder = __ATOMIC_RELAXED;
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
class DummyTC {
public:
__device__ DummyTC() {}
__device__ ~DummyTC() = default;
__device__ DummyTC(const DummyTC&) = default;
__device__ DummyTC& operator=(const DummyTC&) = default;
__device__ DummyTC(DummyTC&&) = default;
__device__ DummyTC& operator=(DummyTC&&) = default;
};
class Dummy {
public:
__device__ Dummy() {}
__device__ ~Dummy() {}
};
__global__ void FetchMinCompileKernel(int* x) {
int old{};
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope);
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope);
old = __hip_atomic_fetch_min(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope);
old = __hip_atomic_fetch_min(x, 1, -1, kMemScope);
old = __hip_atomic_fetch_min(x, 1, 10, kMemScope);
old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1);
old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10);
Dummy dummy{};
old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope);
}
)"};
+412
Просмотреть файл
@@ -0,0 +1,412 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <cmd_options.hh>
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
#include <resource_guards.hh>
namespace cg = cooperative_groups;
namespace Bitwise {
enum class AtomicOperation {
kAnd = 0,
kAndSystem,
kOr,
kOrSystem,
kXor,
kXorSystem,
kBuiltinAnd,
kBuiltinOr,
kBuiltinXor
};
constexpr auto kMask = 0xAAAA;
constexpr auto kTestValue = 0x4545;
constexpr auto kAndTestValue = 0xFFFF;
template <typename TestType, AtomicOperation operation>
__host__ __device__ TestType GetTestValue() {
if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) {
return kAndTestValue;
}
return kTestValue;
}
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__device__ TestType PerformAtomicOperation(TestType* const mem) {
const auto mask = kMask;
if constexpr (operation == AtomicOperation::kAnd) {
return atomicAnd(mem, mask);
} else if constexpr (operation == AtomicOperation::kAndSystem) {
return atomicAnd_system(mem, mask);
} else if constexpr (operation == AtomicOperation::kOr) {
return atomicOr(mem, mask);
} else if constexpr (operation == AtomicOperation::kOrSystem) {
return atomicOr_system(mem, mask);
} else if constexpr (operation == AtomicOperation::kXor) {
return atomicXor(mem, mask);
} else if constexpr (operation == AtomicOperation::kXorSystem) {
return atomicXor_system(mem, mask);
} else if constexpr (operation == AtomicOperation::kBuiltinAnd) {
return __hip_atomic_fetch_and(mem, mask, __ATOMIC_RELAXED, memory_scope);
} else if constexpr (operation == AtomicOperation::kBuiltinOr) {
return __hip_atomic_fetch_or(mem, mask, __ATOMIC_RELAXED, memory_scope);
} else if constexpr (operation == AtomicOperation::kBuiltinXor) {
return __hip_atomic_fetch_xor(mem, mask, __ATOMIC_RELAXED, memory_scope);
}
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
__shared__ TestType shared_mem;
const auto tid = cg::this_grid().thread_rank();
TestType* const mem = use_shared_mem ? &shared_mem : global_mem;
if constexpr (use_shared_mem) {
if (tid == 0) mem[0] = global_mem[0];
__syncthreads();
}
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
if constexpr (use_shared_mem) {
__syncthreads();
if (tid == 0) global_mem[0] = mem[0];
}
}
template <typename TestType>
__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch,
const unsigned int idx) {
const auto byte_ptr = reinterpret_cast<uint8_t*>(ptr);
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
}
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
uint8_t val = *addr;
val ^= 0xAB;
*addr = val;
}
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
const unsigned int width, const unsigned pitch) {
extern __shared__ uint8_t shared_mem[];
const auto tid = cg::this_grid().thread_rank();
TestType* const mem = use_shared_mem ? reinterpret_cast<TestType*>(shared_mem) : global_mem;
if constexpr (use_shared_mem) {
if (tid < width) {
const auto target = PitchedOffset(mem, pitch, tid);
*target = *PitchedOffset(global_mem, pitch, tid);
};
__syncthreads();
}
const auto n = cooperative_groups::this_grid().size() - width;
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
if (tid < n) {
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
PitchedOffset(mem, pitch, tid % width));
} else {
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
GenerateMemoryTraffic(begin_addr, end_addr);
}
if constexpr (use_shared_mem) {
__syncthreads();
if (tid < width) {
const auto target = PitchedOffset(global_mem, pitch, tid);
*target = *PitchedOffset(mem, pitch, tid);
};
}
}
struct TestParams {
auto ThreadCount() const {
return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z;
}
dim3 blocks;
dim3 threads;
unsigned int num_devices = 1u;
unsigned int kernel_count = 1u;
unsigned int width = 1u;
unsigned int pitch = 0u;
unsigned int host_thread_count = 0u;
LinearAllocs alloc_type;
};
template <typename TestType, AtomicOperation operation>
std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const TestParams& p) {
const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount();
TestType test_value = GetTestValue<TestType, operation>();
const auto mask = kMask;
std::vector<TestType> res_vals(p.width, test_value);
std::vector<TestType> old_vals;
old_vals.reserve(thread_count);
for (auto tid = 0u; tid < thread_count; ++tid) {
auto& res = res_vals[tid % p.width];
old_vals.push_back(res);
if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem ||
operation == AtomicOperation::kBuiltinAnd) {
res = res & mask;
} else if constexpr (operation == AtomicOperation::kOr ||
operation == AtomicOperation::kOrSystem ||
operation == AtomicOperation::kBuiltinOr) {
res = res | mask;
} else if constexpr (operation == AtomicOperation::kXor ||
operation == AtomicOperation::kXorSystem ||
operation == AtomicOperation::kBuiltinXor) {
res = res ^ mask;
}
}
return {res_vals, old_vals};
}
template <typename TestType, AtomicOperation operation>
void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<TestType>& old_vals) {
auto [expected_res_vals, expected_old_vals] = TestKernelHostRef<TestType, operation>(p);
for (auto i = 0u; i < res_vals.size(); ++i) {
INFO("Results index: " << i);
REQUIRE(expected_res_vals[i] == res_vals[i]);
}
std::sort(begin(old_vals), end(old_vals));
std::sort(begin(expected_old_vals), end(expected_old_vals));
for (auto i = 0u; i < old_vals.size(); ++i) {
INFO("Old values index: " << i);
REQUIRE(expected_old_vals[i] == old_vals[i]);
}
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
TestType* const old_vals) {
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
if (p.width == 1 && p.pitch == sizeof(TestType))
TestKernel<TestType, operation, use_shared_mem, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
else
TestKernel<TestType, operation, use_shared_mem, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void TestCore(const TestParams& p) {
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
std::vector<StreamGuard> streams;
for (auto i = 0; i < p.num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size);
for (auto j = 0; j < p.kernel_count; ++j) {
streams.emplace_back(Streams::created);
}
}
const auto mem_alloc_size = p.width * p.pitch;
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size);
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount());
std::vector<TestType> res_vals(p.width);
TestType* const mem_ptr =
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
TestType test_value = GetTestValue<TestType, operation>();
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) {
HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice));
}
for (auto i = 0u; i < p.num_devices; ++i) {
for (auto j = 0u; j < p.kernel_count; ++j) {
const auto& stream = streams[i * p.kernel_count + j].stream();
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
old_vals);
}
}
for (auto i = 0u; i < p.num_devices; ++i) {
const auto device_offset = i * p.kernel_count * p.ThreadCount();
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
old_vals_alloc_size, hipMemcpyDeviceToHost));
}
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
p.width, hipMemcpyDeviceToHost));
Verify<TestType, operation>(p, res_vals, old_vals);
}
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
inline dim3 GenerateBlockDimensions() {
int sm_count = 0;
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
}
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
TestParams params;
params.num_devices = 1;
params.kernel_count = 1;
if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
operation == AtomicOperation::kBuiltinOr ||
operation == AtomicOperation::kBuiltinXor) &&
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
params.threads = 1;
} else if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
operation == AtomicOperation::kBuiltinOr ||
operation == AtomicOperation::kBuiltinXor) &&
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
params.threads = dim3(warp_size);
} else {
params.threads = GenerateThreadDimensions();
}
params.width = width;
params.pitch = pitch;
SECTION("Global memory") {
if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
operation == AtomicOperation::kBuiltinOr ||
operation == AtomicOperation::kBuiltinXor) &&
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
params.blocks = dim3(1);
} else {
params.blocks = GenerateBlockDimensions();
}
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false>(params);
}
}
}
SECTION("Shared memory") {
params.blocks = dim3(1);
params.alloc_type = LinearAllocs::hipMalloc;
TestCore<TestType, operation, true>(params);
}
}
template <typename TestType, AtomicOperation operation>
void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width,
const unsigned int pitch) {
int concurrent_kernels = 0;
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0));
if (!concurrent_kernels) {
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
return;
}
TestParams params;
params.num_devices = 1;
params.kernel_count = kernel_count;
params.blocks = GenerateBlockDimensions();
params.threads = GenerateThreadDimensions();
params.width = width;
params.pitch = pitch;
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false>(params);
}
}
}
template <typename TestType, AtomicOperation operation>
void MultipleDeviceMultipleKernelTest(const unsigned int num_devices,
const unsigned int kernel_count, const unsigned int width,
const unsigned int pitch) {
if (num_devices > 1) {
if (HipTest::getDeviceCount() < num_devices) {
std::string msg = std::to_string(num_devices) + " devices are required";
HipTest::HIP_SKIP_TEST(msg.c_str());
return;
}
}
if (kernel_count > 1) {
for (auto i = 0u; i < num_devices; ++i) {
int concurrent_kernels = 0;
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
if (!concurrent_kernels) {
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
return;
}
}
}
TestParams params;
params.num_devices = num_devices;
params.kernel_count = kernel_count;
params.blocks = GenerateBlockDimensions();
params.threads = GenerateThreadDimensions();
params.width = width;
params.pitch = pitch;
using LA = LinearAllocs;
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
}
}
}
} // namespace Bitwise
+433
Просмотреть файл
@@ -0,0 +1,433 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <resource_guards.hh>
enum class BuiltinAtomicOperation {
kLoadStore = 0,
kExchange,
kCompareExchangeStrong,
kCompareExchangeWeak,
kAdd,
kAnd,
kOr,
kXor,
kMin,
kMax
};
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__host__ __device__ void SetFlag(int* const flag) {
#ifdef __HIP_DEVICE_COMPILE__
if constexpr (operation == BuiltinAtomicOperation::kLoadStore) {
static_assert(memory_order != __ATOMIC_ACQ_REL);
__hip_atomic_store(flag, 1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kExchange) {
__hip_atomic_exchange(flag, 1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) {
int compare = 0;
__hip_atomic_compare_exchange_strong(flag, &compare, 1, memory_order, __ATOMIC_RELAXED,
memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) {
int compare = 0;
while (!__hip_atomic_compare_exchange_weak(flag, &compare, 1, memory_order, __ATOMIC_RELAXED,
memory_scope))
compare = 0;
} else if constexpr (operation == BuiltinAtomicOperation::kAdd) {
__hip_atomic_fetch_add(flag, 1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kAnd) {
__hip_atomic_fetch_and(flag, 0x0, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kOr) {
__hip_atomic_fetch_or(flag, 0x1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kXor) {
__hip_atomic_fetch_xor(flag, 0x1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kMin) {
__hip_atomic_fetch_min(flag, -1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kMax) {
__hip_atomic_fetch_max(flag, 1, memory_order, memory_scope);
}
#else
if constexpr (operation == BuiltinAtomicOperation::kAnd) {
__atomic_store_n(flag, 0, __ATOMIC_RELEASE);
} else {
__atomic_store_n(flag, 1, __ATOMIC_RELEASE);
}
#endif
}
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__host__ __device__ int FetchFlag(int* const flag) {
#ifdef __HIP_DEVICE_COMPILE__
if constexpr (operation == BuiltinAtomicOperation::kLoadStore) {
static_assert(memory_order != __ATOMIC_ACQ_REL);
return __hip_atomic_load(flag, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kExchange) {
return __hip_atomic_exchange(flag, 0, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) {
int compare = 1;
__hip_atomic_compare_exchange_strong(
flag, &compare, 1, memory_order,
memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope);
return compare;
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) {
int compare = 1;
__hip_atomic_compare_exchange_weak(
flag, &compare, 1, memory_order,
memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope);
return compare;
} else if constexpr (operation == BuiltinAtomicOperation::kAdd) {
return __hip_atomic_fetch_add(flag, 0, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kAnd) {
return !__hip_atomic_fetch_and(flag, 0x1, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kOr) {
return __hip_atomic_fetch_or(flag, 0x0, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kXor) {
return __hip_atomic_fetch_xor(flag, 0x0, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kMin) {
return __hip_atomic_fetch_min(flag, 0, memory_order, memory_scope);
} else if constexpr (operation == BuiltinAtomicOperation::kMax) {
return __hip_atomic_fetch_max(flag, 0, memory_order, memory_scope);
}
#else
if constexpr (operation == BuiltinAtomicOperation::kAnd) {
return !__atomic_load_n(flag, __ATOMIC_ACQUIRE);
} else {
return __atomic_load_n(flag, __ATOMIC_ACQUIRE);
}
#endif
}
namespace AcquireRelease {
constexpr auto kTestValue = 42;
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__host__ __device__ void Producer(int* const flag, int* const data) {
constexpr int actual_memory_order =
memory_order == __ATOMIC_ACQUIRE ? __ATOMIC_RELEASE : memory_order;
data[0] = kTestValue;
SetFlag<operation, actual_memory_order, memory_scope>(flag);
}
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__host__ __device__ void Consumer(int* const flag, int* const data, int* const ret) {
while (!FetchFlag<operation, memory_order, memory_scope>(flag))
;
ret[0] = data[0];
}
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__global__ void TestKernel(int* const flag, int* data, int* const ret) {
__shared__ int shared_mem;
if (data == nullptr) data = &shared_mem;
if (blockIdx.x == 0 && threadIdx.x == 0) {
if constexpr (operation == BuiltinAtomicOperation::kAnd)
*flag = 1;
else
*flag = 0;
}
__syncthreads();
bool producer = false, consumer = false;
if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
producer = blockIdx.x == 0 && threadIdx.x == 0;
consumer = blockIdx.x == 0 && threadIdx.x == 1;
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
producer = blockIdx.x == 0 && threadIdx.x == 0;
consumer = blockIdx.x == 0 && threadIdx.x == warpSize;
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
producer = blockIdx.x == 0 && threadIdx.x == 0;
consumer = blockIdx.x == 1 && threadIdx.x == 0;
}
if (producer) {
Producer<operation, memory_order, memory_scope>(flag, data);
return;
}
if (consumer) {
Consumer<operation, memory_order, memory_scope>(flag, data, ret);
return;
}
}
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__global__ void ProducerKernel(int* const flag, int* const data) {
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
return;
}
Producer<operation, memory_order, memory_scope>(flag, data);
}
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
__global__ void ConsumerKernel(int* const flag, int* const data, int* const ret) {
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
return;
}
Consumer<operation, memory_order, memory_scope>(flag, data, ret);
}
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope> void Test() {
int blocks = 1, threads = 1;
if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
blocks = 1;
threads = 2;
} else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
blocks = 1;
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
threads = warp_size * 2;
} else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
blocks = 2;
threads = 1;
}
LinearAllocGuard<int> flag(LinearAllocs::hipMalloc, sizeof(int));
LinearAllocGuard<int> ret(LinearAllocs::hipMallocManaged, sizeof(int));
SECTION("Global memory") {
const auto alloc_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipMallocManaged);
LinearAllocGuard<int> data(alloc_type, sizeof(int));
TestKernel<operation, memory_order, memory_scope>
<<<blocks, threads>>>(flag.ptr(), data.ptr(), ret.ptr());
}
if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) {
SECTION("Shared memory") {
TestKernel<operation, memory_order, memory_scope>
<<<blocks, threads>>>(flag.ptr(), nullptr, ret.ptr());
}
}
HIP_CHECK(hipDeviceSynchronize());
REQUIRE(ret.ptr()[0] == kTestValue);
}
template <BuiltinAtomicOperation operation, int memory_order> void SystemTest() {
std::thread host_thread;
LinearAllocGuard<int> flag(LinearAllocs::hipMallocManaged, sizeof(int));
LinearAllocGuard<int> ret(LinearAllocs::hipMallocManaged, sizeof(int));
SECTION("Global memory") {
const auto alloc_type = GENERATE(LinearAllocs::hipHostMalloc, LinearAllocs::hipMallocManaged);
LinearAllocGuard<int> data(alloc_type, sizeof(int));
SECTION("Host producer - Device consumer") {
ConsumerKernel<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>
<<<1, 1>>>(flag.ptr(), data.ptr(), ret.ptr());
host_thread = std::thread([&] {
Producer<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>(flag.ptr(), data.ptr());
});
}
SECTION("Device producer - Host consumer") {
host_thread = std::thread([&] {
Consumer<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>(flag.ptr(), data.ptr(),
ret.ptr());
});
ProducerKernel<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>
<<<1, 1>>>(flag.ptr(), data.ptr());
}
}
HIP_CHECK(hipDeviceSynchronize());
host_thread.join();
REQUIRE(ret.ptr()[0] == kTestValue);
}
} /* namespace AcquireRelease */
namespace SequentialConsistency {
template <BuiltinAtomicOperation operation, int memory_scope>
__host__ __device__ void Producer(int* const flag) {
__atomic_store_n(flag, 1, __ATOMIC_SEQ_CST);
}
template <BuiltinAtomicOperation operation, int memory_scope>
__host__ __device__ void Consumer(int* const flag1, int* const flag2, int* const counter) {
while (!FetchFlag<operation, __ATOMIC_SEQ_CST, memory_scope>(flag1))
;
if (FetchFlag<operation, __ATOMIC_SEQ_CST, memory_scope>(flag2)) {
#ifdef __HIP_DEVICE_COMPILE__
__hip_atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST, memory_scope);
#else
__atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST);
#endif
}
}
template <BuiltinAtomicOperation operation, int memory_scope>
__global__ void TestKernel(int* flag1, int* flag2, int* const counter) {
__shared__ int shared_mem[2];
if (flag1 == nullptr) flag1 = &shared_mem[0];
if (flag2 == nullptr) flag2 = &shared_mem[1];
if (blockIdx.x == 0 && threadIdx.x == 0) {
if constexpr (operation == BuiltinAtomicOperation::kAnd) {
*flag1 = 1;
*flag2 = 1;
} else {
*flag1 = 0;
*flag2 = 0;
}
}
__syncthreads();
bool producer1 = false, producer2 = false, consumer1 = false, consumer2 = false;
if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
producer1 = blockIdx.x == 0 && threadIdx.x == 0;
consumer1 = blockIdx.x == 0 && threadIdx.x == 1;
producer2 = blockIdx.x == 0 && threadIdx.x == 2;
consumer2 = blockIdx.x == 0 && threadIdx.x == 3;
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
producer1 = blockIdx.x == 0 && threadIdx.x == 0;
consumer1 = blockIdx.x == 0 && threadIdx.x == warpSize;
producer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 2;
consumer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 3;
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
producer1 = blockIdx.x == 0 && threadIdx.x == 0;
consumer1 = blockIdx.x == 1 && threadIdx.x == 0;
producer2 = blockIdx.x == 2 && threadIdx.x == 0;
consumer2 = blockIdx.x == 3 && threadIdx.x == 0;
}
if (producer1) {
Producer<operation, memory_scope>(flag1);
return;
}
if (consumer1) {
Consumer<operation, memory_scope>(flag1, flag2, counter);
return;
}
if (producer2) {
Producer<operation, memory_scope>(flag2);
return;
}
if (consumer2) {
Consumer<operation, memory_scope>(flag2, flag1, counter);
return;
}
}
template <BuiltinAtomicOperation operation, int memory_scope>
__global__ void ProducerKernel(int* const flag) {
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
return;
}
Producer<operation, memory_scope>(flag);
}
template <BuiltinAtomicOperation operation, int memory_scope>
__global__ void ConsumerKernel(int* const flag1, int* const flag2, int* const counter) {
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
return;
}
Consumer<operation, memory_scope>(flag1, flag2, counter);
}
template <BuiltinAtomicOperation operation, int memory_scope> void Test() {
int blocks = 1, threads = 1;
if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
blocks = 1;
threads = 4;
} else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
blocks = 1;
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
threads = warp_size * 4;
} else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
blocks = 4;
threads = 1;
}
LinearAllocGuard<int> counter(LinearAllocs::hipMallocManaged, sizeof(int));
SECTION("Global memory") {
const auto alloc_type = GENERATE(LinearAllocs::hipMalloc);
LinearAllocGuard<int> flag1(alloc_type, sizeof(int));
LinearAllocGuard<int> flag2(alloc_type, sizeof(int));
TestKernel<operation, memory_scope>
<<<blocks, threads>>>(flag1.ptr(), flag2.ptr(), counter.ptr());
}
if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) {
SECTION("Shared memory") {
TestKernel<operation, memory_scope><<<blocks, threads>>>(nullptr, nullptr, counter.ptr());
}
}
HIP_CHECK(hipDeviceSynchronize());
REQUIRE(counter.ptr()[0] != 0);
}
template <BuiltinAtomicOperation operation> void SystemTest() {
std::thread host_producer, host_consumer;
LinearAllocGuard<int> counter(LinearAllocs::hipMallocManaged, sizeof(int));
SECTION("Global memory") {
const auto alloc_type = GENERATE(LinearAllocs::hipMallocManaged);
LinearAllocGuard<int> flag1(alloc_type, sizeof(int));
LinearAllocGuard<int> flag2(alloc_type, sizeof(int));
ConsumerKernel<operation, __HIP_MEMORY_SCOPE_SYSTEM>
<<<1, 1>>>(flag1.ptr(), flag2.ptr(), counter.ptr());
host_consumer = std::thread([&] {
Consumer<operation, __HIP_MEMORY_SCOPE_SYSTEM>(flag2.ptr(), flag1.ptr(), counter.ptr());
});
ProducerKernel<operation, __HIP_MEMORY_SCOPE_SYSTEM><<<1, 1>>>(flag1.ptr());
host_producer =
std::thread([&] { Producer<operation, __HIP_MEMORY_SCOPE_SYSTEM>(flag2.ptr()); });
}
HIP_CHECK(hipDeviceSynchronize());
host_producer.join();
host_consumer.join();
REQUIRE(counter.ptr()[0] != 0);
}
} // namespace SequentialConsistency
+420
Просмотреть файл
@@ -0,0 +1,420 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
#include <resource_guards.hh>
#include <cmd_options.hh>
namespace cg = cooperative_groups;
namespace MinMax {
enum class AtomicOperation {
kMin = 0,
kMinSystem,
kMax,
kMaxSystem,
kSafeMin,
kUnsafeMin,
kSafeMax,
kUnsafeMax,
kBuiltinMin,
kBuiltinMax
};
constexpr auto kIntegerTestValue = 5;
constexpr auto kFloatingPointTestValue = 5.5;
template <typename TestType, AtomicOperation operation>
__host__ __device__ TestType GetTestValue() {
TestType test_value =
std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem ||
operation == AtomicOperation::kUnsafeMin ||
operation == AtomicOperation::kSafeMin) {
return test_value - 2;
}
return test_value + 2;
}
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__device__ TestType PerformAtomicOperation(TestType* const mem) {
const auto val = GetTestValue<TestType, operation>();
if constexpr (operation == AtomicOperation::kMin) {
return atomicMin(mem, val);
} else if constexpr (operation == AtomicOperation::kMinSystem) {
return atomicMin_system(mem, val);
} else if constexpr (operation == AtomicOperation::kMax) {
return atomicMax(mem, val);
} else if constexpr (operation == AtomicOperation::kMaxSystem) {
return atomicMax_system(mem, val);
} else if constexpr (operation == AtomicOperation::kUnsafeMin) {
return unsafeAtomicMin(mem, val);
} else if constexpr (operation == AtomicOperation::kSafeMin) {
return safeAtomicMin(mem, val);
} else if constexpr (operation == AtomicOperation::kUnsafeMax) {
return unsafeAtomicMax(mem, val);
} else if constexpr (operation == AtomicOperation::kSafeMax) {
return safeAtomicMax(mem, val);
} else if constexpr (operation == AtomicOperation::kBuiltinMin) {
return __hip_atomic_fetch_min(mem, val, __ATOMIC_RELAXED, memory_scope);
} else if constexpr (operation == AtomicOperation::kBuiltinMax) {
return __hip_atomic_fetch_max(mem, val, __ATOMIC_RELAXED, memory_scope);
}
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
__shared__ TestType shared_mem;
const auto tid = cg::this_grid().thread_rank();
TestType* const mem = use_shared_mem ? &shared_mem : global_mem;
if constexpr (use_shared_mem) {
if (tid == 0) mem[0] = global_mem[0];
__syncthreads();
}
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
if constexpr (use_shared_mem) {
__syncthreads();
if (tid == 0) global_mem[0] = mem[0];
}
}
template <typename TestType>
__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch,
const unsigned int idx) {
const auto byte_ptr = reinterpret_cast<uint8_t*>(ptr);
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
}
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
uint8_t val = *addr;
val ^= 0xAB;
*addr = val;
}
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
const unsigned int width, const unsigned pitch) {
extern __shared__ uint8_t shared_mem[];
const auto tid = cg::this_grid().thread_rank();
TestType* const mem = use_shared_mem ? reinterpret_cast<TestType*>(shared_mem) : global_mem;
if constexpr (use_shared_mem) {
if (tid < width) {
const auto target = PitchedOffset(mem, pitch, tid);
*target = *PitchedOffset(global_mem, pitch, tid);
};
__syncthreads();
}
const auto n = cooperative_groups::this_grid().size() - width;
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
if (tid < n) {
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
PitchedOffset(mem, pitch, tid % width));
} else {
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
GenerateMemoryTraffic(begin_addr, end_addr);
}
if constexpr (use_shared_mem) {
__syncthreads();
if (tid < width) {
const auto target = PitchedOffset(global_mem, pitch, tid);
*target = *PitchedOffset(mem, pitch, tid);
};
}
}
struct TestParams {
auto ThreadCount() const {
return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z;
}
dim3 blocks;
dim3 threads;
unsigned int num_devices = 1u;
unsigned int kernel_count = 1u;
unsigned int width = 1u;
unsigned int pitch = 0u;
unsigned int host_thread_count = 0u;
LinearAllocs alloc_type;
};
template <typename TestType, AtomicOperation operation>
std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const TestParams& p) {
const auto val = GetTestValue<TestType, operation>();
const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount();
TestType test_value =
std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
std::vector<TestType> res_vals(p.width, test_value);
std::vector<TestType> old_vals;
old_vals.reserve(thread_count);
for (auto tid = 0u; tid < thread_count; ++tid) {
auto& res = res_vals[tid % p.width];
old_vals.push_back(res);
if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem ||
operation == AtomicOperation::kUnsafeMin ||
operation == AtomicOperation::kSafeMin ||
operation == AtomicOperation::kBuiltinMin) {
res = std::min(res, val);
} else if constexpr (operation == AtomicOperation::kMax ||
operation == AtomicOperation::kMaxSystem ||
operation == AtomicOperation::kUnsafeMax ||
operation == AtomicOperation::kSafeMax ||
operation == AtomicOperation::kBuiltinMax) {
res = std::max(res, val);
}
}
return {res_vals, old_vals};
}
template <typename TestType, AtomicOperation operation>
void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<TestType>& old_vals) {
auto [expected_res_vals, expected_old_vals] = TestKernelHostRef<TestType, operation>(p);
for (auto i = 0u; i < res_vals.size(); ++i) {
INFO("Results index: " << i);
REQUIRE(expected_res_vals[i] == res_vals[i]);
}
std::sort(begin(old_vals), end(old_vals));
std::sort(begin(expected_old_vals), end(expected_old_vals));
for (auto i = 0u; i < old_vals.size(); ++i) {
INFO("Old values index: " << i);
REQUIRE(expected_old_vals[i] == old_vals[i]);
}
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
TestType* const old_vals) {
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
if (p.width == 1 && p.pitch == sizeof(TestType))
TestKernel<TestType, operation, use_shared_mem, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
else
TestKernel<TestType, operation, use_shared_mem, memory_scope>
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
}
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void TestCore(const TestParams& p) {
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
std::vector<StreamGuard> streams;
for (auto i = 0; i < p.num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size);
for (auto j = 0; j < p.kernel_count; ++j) {
streams.emplace_back(Streams::created);
}
}
const auto mem_alloc_size = p.width * p.pitch;
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size);
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount());
std::vector<TestType> res_vals(p.width);
TestType* const mem_ptr =
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
TestType test_value =
std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) {
HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice));
}
for (auto i = 0u; i < p.num_devices; ++i) {
for (auto j = 0u; j < p.kernel_count; ++j) {
const auto& stream = streams[i * p.kernel_count + j].stream();
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
old_vals);
}
}
for (auto i = 0u; i < p.num_devices; ++i) {
const auto device_offset = i * p.kernel_count * p.ThreadCount();
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
old_vals_alloc_size, hipMemcpyDeviceToHost));
}
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
p.width, hipMemcpyDeviceToHost));
Verify<TestType, operation>(p, res_vals, old_vals);
}
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
inline dim3 GenerateBlockDimensions() {
int sm_count = 0;
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
}
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
TestParams params;
params.num_devices = 1;
params.kernel_count = 1;
if constexpr ((operation == AtomicOperation::kBuiltinMin ||
operation == AtomicOperation::kBuiltinMax) &&
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
params.threads = 1;
} else if constexpr ((operation == AtomicOperation::kBuiltinMin ||
operation == AtomicOperation::kBuiltinMax) &&
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
params.threads = dim3(warp_size);
} else {
params.threads = GenerateThreadDimensions();
}
params.width = width;
params.pitch = pitch;
SECTION("Global memory") {
if constexpr ((operation == AtomicOperation::kBuiltinMin ||
operation == AtomicOperation::kBuiltinMax) &&
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
params.blocks = dim3(1);
} else {
params.blocks = GenerateBlockDimensions();
}
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false>(params);
}
}
}
SECTION("Shared memory") {
params.blocks = dim3(1);
params.alloc_type = LinearAllocs::hipMalloc;
TestCore<TestType, operation, true>(params);
}
}
template <typename TestType, AtomicOperation operation>
void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width,
const unsigned int pitch) {
int concurrent_kernels = 0;
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0));
if (!concurrent_kernels) {
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
return;
}
TestParams params;
params.num_devices = 1;
params.kernel_count = kernel_count;
params.blocks = GenerateThreadDimensions();
params.threads = GenerateBlockDimensions();
params.width = width;
params.pitch = pitch;
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false>(params);
}
}
}
template <typename TestType, AtomicOperation operation>
void MultipleDeviceMultipleKernelTest(const unsigned int num_devices,
const unsigned int kernel_count, const unsigned int width,
const unsigned int pitch) {
if (num_devices > 1) {
if (HipTest::getDeviceCount() < num_devices) {
std::string msg = std::to_string(num_devices) + " devices are required";
HipTest::HIP_SKIP_TEST(msg.c_str());
return;
}
}
if (kernel_count > 1) {
for (auto i = 0u; i < num_devices; ++i) {
int concurrent_kernels = 0;
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
if (!concurrent_kernels) {
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
return;
}
}
}
TestParams params;
params.num_devices = num_devices;
params.kernel_count = kernel_count;
params.blocks = GenerateThreadDimensions();
params.threads = GenerateBlockDimensions();
params.width = width;
params.pitch = pitch;
using LA = LinearAllocs;
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
}
}
}
} // namespace MinMax
+123
Просмотреть файл
@@ -0,0 +1,123 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup safeAtomicAdd safeAtomicAdd
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of safeAtomicAdd
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/safeAtomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSafeAdd>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSafeAdd>(warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSafeAdd>(warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will
* perform an atomic addition on a target memory location. Each thread will add the same value to
* the memory location, storing the return value into a separate output array slot corresponding
* to it. Once complete, the output array and target memory is validated to contain all the
* expected values. Several memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of safeAtomicAdd
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/safeAtomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive_Multi_Kernel", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSafeAdd>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSafeAdd>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSafeAdd>(2, warp_size,
cache_line_size);
}
}
}
+175
Просмотреть файл
@@ -0,0 +1,175 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup safeAtomicMax safeAtomicMax
* @{
* @ingroup AtomicsTest
* `safeAtomicMax(TestType* address, TestType* val)` -
* calculates maximum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs safeAtomicMax from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_SameAddress", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMax from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Adjacent_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMax from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Scattered_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMax from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMax from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMax from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
2, warp_size, cache_line_size);
}
}
}
+175
Просмотреть файл
@@ -0,0 +1,175 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup safeAtomicMin safeAtomicMin
* @{
* @ingroup AtomicsTest
* `safeAtomicMin(TestType* address, TestType* val)` -
* calculates minimum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs safeAtomicMin from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_SameAddress", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMin from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Adjacent_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMin from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Scattered_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMin from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMin from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs safeAtomicMin from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/safeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
2, warp_size, cache_line_size);
}
}
}
+165
Просмотреть файл
@@ -0,0 +1,165 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "memory_order_common.hh"
TEST_CASE("Unit___hip_atomic_load_store_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kLoadStore, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kLoadStore, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kLoadStore, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kLoadStore>(); }
}
TEST_CASE("Unit___hip_atomic_exchange_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kExchange, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kExchange, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kExchange, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kExchange>(); }
}
TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeStrong,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeStrong,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeStrong,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
SequentialConsistency::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong>();
}
}
TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeWeak,
__HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeWeak,
__HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeWeak,
__HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") {
SequentialConsistency::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak>();
}
}
TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kAdd, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kAdd, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kAdd, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kAdd>(); }
}
TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kAnd, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kAnd, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kAnd, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kAnd>(); }
}
TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kOr, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kOr, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kOr, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kOr>(); }
}
TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kXor, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kXor, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kXor, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kXor>(); }
}
TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kMin, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kMin, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kMin, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kMin>(); }
}
TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency") {
SECTION("WAVEFRONT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kMax, __HIP_MEMORY_SCOPE_WAVEFRONT>();
}
SECTION("WORKGROUP") {
SequentialConsistency::Test<BuiltinAtomicOperation::kMax, __HIP_MEMORY_SCOPE_WORKGROUP>();
}
SECTION("AGENT") {
SequentialConsistency::Test<BuiltinAtomicOperation::kMax, __HIP_MEMORY_SCOPE_AGENT>();
}
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kMax>(); }
}
+124
Просмотреть файл
@@ -0,0 +1,124 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "arithmetic_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup unsafeAtomicAdd unsafeAtomicAdd
* @{
* @ingroup AtomicsTest
*/
/**
* Test Description
* ------------------------
* - Executes a single kernel on a single device wherein all threads will perform an atomic
* addition on a target memory location. Each thread will add the same value to the memory location,
* storing the return value into a separate output array slot corresponding to it. Once complete,
* the output array and target memory is validated to contain all the expected values. Several
* memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of unsafeAtomicAdd
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Shared memory
* - Several grid and block dimension combinations (only one block is used for shared memory).
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(warp_size,
cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Executes a kernel two times concurrently on a single device wherein all threads will
* perform an atomic addition on a target memory location. Each thread will add the same value to
* the memory location, storing the return value into a separate output array slot corresponding
* to it. Once complete, the output array and target memory is validated to contain all the
* expected values. Several memory access patterns are tested:
* -# All threads add to a single, compile time deducible, memory location
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
* for indexing
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
*
* - The test is run for:
* - All overloads of unsafeAtomicAdd
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
* - Several grid and block dimension combinations.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicAdd.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive_Multi_Kernel", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(2, 1, sizeof(TestType));
}
DYNAMIC_SECTION("Adjacent addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(2, warp_size,
sizeof(TestType));
}
DYNAMIC_SECTION("Scattered addresses " << current) {
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(2, warp_size,
cache_line_size);
}
}
}
+175
Просмотреть файл
@@ -0,0 +1,175 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup unsafeAtomicMax unsafeAtomicMax
* @{
* @ingroup AtomicsTest
* `unsafeAtomicMax(TestType* address, TestType* val)` -
* calculates maximum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMax from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_SameAddress", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMax from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Adjacent_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMax from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Scattered_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMax from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMax from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMax from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMax.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
2, warp_size, cache_line_size);
}
}
}
+175
Просмотреть файл
@@ -0,0 +1,175 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "min_max_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup unsafeAtomicMin unsafeAtomicMin
* @{
* @ingroup AtomicsTest
* `unsafeAtomicMin(TestType* address, TestType* val)` -
* calculates minimum between address and val, returns old value.
*/
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMin from multiple threads on the same address.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_SameAddress", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMin from multiple threads on adjacent addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Adjacent_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMin from multiple threads on the scattered addresses.
* - Uses only one device and launches one kernel.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Scattered_Addresses", "", float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
warp_size, cache_line_size);
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMin from multiple threads on the same address.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) {
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Same address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
2, 1, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMin from multiple threads on adjacent addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Adjacent address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
2, warp_size, sizeof(TestType));
}
}
}
/**
* Test Description
* ------------------------
* - Performs unsafeAtomicMin from multiple threads on the scattered addresses.
* - Uses only one device and launches multiple kernels.
* Test source
* ------------------------
* - unit/atomics/unsafeAtomicMin.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
for (auto current = 0; current < cmd_options.iterations; ++current) {
DYNAMIC_SECTION("Scattered address " << current) {
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
2, warp_size, cache_line_size);
}
}
}
+1 -1
Просмотреть файл
@@ -52,7 +52,7 @@ class CompileAndCapture(unittest.TestCase):
# HIP compiler on AMD platforms has limit of 20 errors, and some negative
# test cases expect that more errors are detected.
if (self.platform == 'amd'):
compiler_args.append('-ferror-limit=100')
compiler_args.append('-ferror-limit=200')
compiler_output = subprocess.run(compiler_args, stderr=subprocess.PIPE)
# Get the compiler output in the stdout if -V flag is raised during ctest invocation.
compiler_stderr = compiler_output.stderr.decode('UTF-8')
+1
Просмотреть файл
@@ -2,6 +2,7 @@
set(TEST_SRC
thread_block.cc
thread_block_tile.cc
coalesced_group_tiled_partition.cc
hipCGThreadBlockType_old.cc
hipCGMultiGridGroupType_old.cc
hipCGGridGroupType_old.cc
+685
Просмотреть файл
@@ -0,0 +1,685 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "cooperative_groups_common.hh"
#include <bitset>
#include <optional>
#include <resource_guards.hh>
#include <utils.hh>
#include <cmd_options.hh>
#include <cpu_grid.h>
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
/**
* @addtogroup coalesced_group_tile coalesced_group_tile
* @{
* @ingroup DeviceLanguageTest
* Contains unit tests for partitioning of coalesced groups into tiled partitions
*/
namespace cg = cooperative_groups;
namespace {
#if HT_AMD
constexpr auto kMaskMin = std::numeric_limits<uint64_t>().min();
constexpr auto kMaskLimit = std::numeric_limits<uint64_t>().max();
#else
constexpr auto kMaskMin = std::numeric_limits<uint32_t>().min();
constexpr auto kMaskLimit = std::numeric_limits<uint32_t>().max();
#endif
} // namespace
static unsigned int GenerateTileSizes() {
#if HT_AMD
return GENERATE(2u, 4u, 8u, 16u, 32u, 64u);
#else
return GENERATE(2u, 4u, 8u, 16u, 32u);
#endif
}
static inline std::mt19937& GetRandomGenerator() {
static std::mt19937 mt(11);
return mt;
}
template <typename T> static inline T GenerateRandomInteger(const T min, const T max) {
std::uniform_int_distribution<T> dist(min, max);
return dist(GetRandomGenerator());
}
template <size_t warp_size> static auto coalesce_threads(const uint64_t mask) {
std::tuple<std::array<unsigned int, warp_size>, unsigned int> res;
auto& [threads, count] = res;
count = 0u;
for (auto i = 0u; i < warp_size; ++i) {
if (mask & (1u << i)) {
threads[count++] = i;
}
}
return res;
}
template <size_t warp_size> __device__ bool deactivate_thread(uint64_t* active_masks) {
const cg::thread_block_tile<warp_size> warp =
cg::tiled_partition<warp_size>(cg::this_thread_block());
const auto block = cg::this_thread_block();
const auto warps_per_block = (block.size() + warp_size - 1) / warp_size;
const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
const auto idx = block_rank * warps_per_block + block.thread_rank() / warp.size();
return !(active_masks[idx] & (1u << warp.thread_rank()));
}
template <size_t warp_size>
__global__ void coalesced_group_tiled_partition_size_getter(uint64_t* active_masks,
unsigned int tile_size,
unsigned int* sizes) {
if (deactivate_thread<warp_size>(active_masks)) {
return;
}
sizes[thread_rank_in_grid()] = cg::tiled_partition(cg::coalesced_threads(), tile_size).size();
}
template <size_t warp_size>
__global__ void coalesced_group_tiled_partition_thread_rank_getter(uint64_t* active_masks,
unsigned int tile_size,
unsigned int* sizes) {
if (deactivate_thread<warp_size>(active_masks)) {
return;
}
sizes[thread_rank_in_grid()] =
cg::tiled_partition(cg::coalesced_threads(), tile_size).thread_rank();
}
/**
* Test Description
* ------------------------
* - Deactivates threads based on passed in mask and creates tiled partitions over coalesced
* threads for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes the return values of
* size and thread_rank member functions to an output array that is validated on the host side.
* Test source
* ------------------------
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic") {
const auto tile_size = GenerateTileSizes();
INFO("Tile size: " << tile_size);
auto blocks = GenerateBlockDimensions();
auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(unsigned int);
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
const auto warps_in_grid = warps_in_block * grid.block_count_;
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
warps_in_grid * sizeof(uint64_t));
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
warps_in_grid * sizeof(uint64_t));
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
[] { return GenerateRandomInteger(0u, std::numeric_limits<uint32_t>().max()); });
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
coalesced_group_tiled_partition_size_getter<32>
<<<blocks, threads>>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
coalesced_group_tiled_partition_thread_rank_getter<32>
<<<blocks, threads>>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr());
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
// validate size
for (auto i = 0u; i < warps_in_grid; ++i) {
auto current_warp_mask = active_masks.ptr()[i];
const auto shift_amount =
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
const auto [active_threads, active_thread_count] =
coalesce_threads<kWarpSize>(current_warp_mask);
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
const auto num_tiles = (active_thread_count + tile_size - 1) / tile_size;
const auto tile_tail = num_tiles * tile_size - active_thread_count;
// Step tile-sized window over active threads
for (auto t = 0u; t < active_thread_count; t += tile_size) {
const auto window_start = t;
const auto window_end = t + tile_size;
// Iterate through window
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
const auto expected_val = tile_size - tile_tail * (t + tile_size >= active_thread_count);
const auto actual_val = uint_arr.ptr()[global_thread_idx];
INFO("global index: " << global_thread_idx);
if (actual_val != expected_val) {
REQUIRE(actual_val == expected_val);
}
}
}
}
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// validate rank
for (auto i = 0u; i < warps_in_grid; ++i) {
auto current_warp_mask = active_masks.ptr()[i];
const auto shift_amount =
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
const auto [active_threads, active_thread_count] =
coalesce_threads<kWarpSize>(current_warp_mask);
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
// Step tile-sized window over active threads
for (auto t = 0u; t < active_thread_count; t += tile_size) {
const auto window_start = t;
const auto window_end = t + tile_size;
// Iterate through window
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
const auto expected_val = k % tile_size;
const auto actual_val = uint_arr.ptr()[global_thread_idx];
INFO("global index: " << global_thread_idx);
if (actual_val != expected_val) {
REQUIRE(actual_val == expected_val);
}
}
}
}
}
template <typename T, size_t warp_size>
__global__ void coalesced_group_tiled_partition_shfl_up(uint64_t* active_masks, T* const out,
const unsigned int tile_size,
const unsigned int delta) {
if (deactivate_thread<warp_size>(active_masks)) {
return;
}
const cg::thread_block_tile<warp_size> warp =
cg::tiled_partition<warp_size>(cg::this_thread_block());
T var = static_cast<T>(warp.thread_rank());
const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size);
out[thread_rank_in_grid()] = tile.shfl_up(var, delta);
}
template <typename T> static void CoalescedGroupTiledPartitonShflUpTestImpl() {
const auto tile_size = GenerateTileSizes();
INFO("Tile size: " << tile_size);
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
const auto delta = GENERATE_COPY(range(0u, tile_size));
INFO("Delta: " << delta);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
const auto warps_in_grid = warps_in_block * grid.block_count_;
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
warps_in_grid * sizeof(uint64_t));
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
warps_in_grid * sizeof(uint64_t));
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
coalesced_group_tiled_partition_shfl_up<T, kWarpSize>
<<<blocks, threads>>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
for (auto i = 0u; i < warps_in_grid; ++i) {
auto current_warp_mask = active_masks.ptr()[i];
const auto shift_amount =
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
const auto [active_threads, active_thread_count] =
coalesce_threads<kWarpSize>(current_warp_mask);
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
// Step tile-sized window over active threads
for (auto t = 0u; t < active_thread_count; t += tile_size) {
const auto window_start = t + delta;
const auto window_end = t + tile_size;
// Iterate through window
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
const auto expected_val = active_threads[k - delta];
const auto actual_val = uint_arr.ptr()[global_thread_idx];
INFO("global index: " << global_thread_idx);
if (actual_val != expected_val) {
REQUIRE(actual_val == expected_val);
}
}
}
}
}
/**
* Test Description
* ------------------------
* - Validates the shuffle up behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32,
* 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced
* group, with memberships of threads in the coalesced group being controlled via a passed in active
* mask. The test is run for all overloads of shfl_up.
* Test source
* ------------------------
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic", "", int,
unsigned int, long, unsigned long, long long, unsigned long long, float,
double) {
CoalescedGroupTiledPartitonShflUpTestImpl<TestType>();
}
template <typename T, size_t warp_size>
__global__ void coalesced_group_tiled_partition_shfl_down(uint64_t* active_masks, T* const out,
const unsigned int tile_size,
const unsigned int delta) {
if (deactivate_thread<warp_size>(active_masks)) {
return;
}
const cg::thread_block_tile<warp_size> warp =
cg::tiled_partition<warp_size>(cg::this_thread_block());
T var = static_cast<T>(warp.thread_rank());
const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size);
out[thread_rank_in_grid()] = tile.shfl_down(var, delta);
}
template <typename T> static void CoalescedGroupTiledPartitonShflDownTestImpl() {
const auto tile_size = GenerateTileSizes();
INFO("Tile size: " << tile_size);
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
const auto delta = GENERATE_COPY(range(0u, tile_size));
INFO("Delta: " << delta);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
const auto warps_in_grid = warps_in_block * grid.block_count_;
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
warps_in_grid * sizeof(uint64_t));
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
warps_in_grid * sizeof(uint64_t));
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
coalesced_group_tiled_partition_shfl_down<T, kWarpSize>
<<<blocks, threads>>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
for (auto i = 0u; i < warps_in_grid; ++i) {
auto current_warp_mask = active_masks.ptr()[i];
const auto shift_amount =
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
const auto [active_threads, active_thread_count] =
coalesce_threads<kWarpSize>(current_warp_mask);
if (delta >= active_thread_count) {
continue;
}
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
// Step tile-sized window over active threads
for (auto t = 0u; t < active_thread_count; t += tile_size) {
const auto window_start = t;
const auto window_end = t + tile_size - delta;
// Iterate through window
for (auto k = window_start; k < window_end && k < active_thread_count - delta; ++k) {
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
const auto expected_val = active_threads[k + delta];
const auto actual_val = uint_arr.ptr()[global_thread_idx];
INFO("global index: " << global_thread_idx);
if (actual_val != expected_val) {
REQUIRE(actual_val == expected_val);
}
}
}
}
}
/**
* Test Description
* ------------------------
* - Validates the shuffle down behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32,
* 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced
* group, with memberships of threads in the coalesced group being controlled via a passed in active
* mask. The test is run for all overloads of shfl_down.
* Test source
* ------------------------
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic", "", int,
unsigned int, long, unsigned long, long long, unsigned long long, float,
double) {
CoalescedGroupTiledPartitonShflDownTestImpl<TestType>();
}
template <typename T, size_t warp_size>
__global__ void coalesced_group_tiled_partition_shfl(uint64_t* active_masks, uint8_t* target_lanes,
T* const out, const unsigned int tile_size) {
if (deactivate_thread<warp_size>(active_masks)) {
return;
}
const cg::thread_block_tile<warp_size> warp =
cg::tiled_partition<warp_size>(cg::this_thread_block());
T var = static_cast<T>(warp.thread_rank());
const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size);
out[thread_rank_in_grid()] = tile.shfl(var, target_lanes[tile.thread_rank()]);
}
template <typename T> static void CoalescedGroupTiledPartitonShflTestImpl() {
const auto tile_size = GenerateTileSizes();
INFO("Tile size: " << tile_size);
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
const auto warps_in_grid = warps_in_block * grid.block_count_;
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
warps_in_grid * sizeof(uint64_t));
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
warps_in_grid * sizeof(uint64_t));
LinearAllocGuard<uint8_t> target_lanes_dev(LinearAllocs::hipMalloc, tile_size * sizeof(uint8_t));
LinearAllocGuard<uint8_t> target_lanes(LinearAllocs::hipHostMalloc, tile_size * sizeof(uint8_t));
std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size,
[tile_size] { return GenerateRandomInteger(0, static_cast<int>(2 * tile_size)); });
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
coalesced_group_tiled_partition_shfl<T, kWarpSize><<<blocks, threads>>>(
active_masks_dev.ptr(), target_lanes_dev.ptr(), uint_arr_dev.ptr(), tile_size);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
for (auto i = 0u; i < warps_in_grid; ++i) {
auto current_warp_mask = active_masks.ptr()[i];
const auto shift_amount =
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
const auto [active_threads, active_thread_count] =
coalesce_threads<kWarpSize>(current_warp_mask);
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
// Step tile-sized window over active threads
for (auto t = 0u; t < active_thread_count; t += tile_size) {
const auto window_start = t;
const auto window_end = t + tile_size;
// Iterate through window
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
const auto target_lane = target_lanes.ptr()[k % tile_size];
if (target_lane >= tile_size || target_lane >= active_thread_count - t) {
continue;
}
const auto expected_val = active_threads[t + target_lane];
const auto actual_val = uint_arr.ptr()[global_thread_idx];
INFO("global index: " << global_thread_idx);
if (actual_val != expected_val) {
REQUIRE(actual_val == expected_val);
}
}
}
}
}
/**
* Test Description
* ------------------------
* - Validates the shuffle behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32,
* 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced
* group, with memberships of threads in the coalesced group being controlled via a passed in active
* mask. The test is run for all overloads of shfl.
* Test source
* ------------------------
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic", "", int,
unsigned int, long, unsigned long, long long, unsigned long long, float,
double) {
CoalescedGroupTiledPartitonShflTestImpl<TestType>();
}
template <bool use_global, size_t warp_size, typename T>
__global__ void coalesced_group_tiled_partition_sync_check(uint64_t* active_masks, T* global_data,
unsigned int* wait_modifiers,
size_t tile_size) {
if (deactivate_thread<warp_size>(active_masks)) {
return;
}
extern __shared__ uint8_t shared_data[];
T* const data = use_global ? global_data : reinterpret_cast<T*>(shared_data);
const auto tid = cg::this_grid().thread_rank();
const auto block = cg::this_thread_block();
const auto coalesced = cg::coalesced_threads();
const auto partition = cg::tiled_partition(coalesced, tile_size);
const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); };
const auto wait_modifier = wait_modifiers[tid];
const auto block_rank = tid / block.size();
const auto warp_rank = block.thread_rank() / warp_size;
const auto warp_base = block_rank * block.size() + warp_rank * warp_size;
const auto global_idx = warp_base + coalesced.thread_rank();
busy_wait(wait_modifier);
data[data_idx(global_idx)] = partition.thread_rank();
partition.sync();
bool valid = true;
const auto tile_rank = coalesced.thread_rank() / tile_size;
for (auto i = 0u; i < tile_size; ++i) {
const auto target_rank_in_tile = (coalesced.thread_rank() + i) % tile_size;
const auto target_rank_in_warp = tile_rank * tile_size + target_rank_in_tile;
if (target_rank_in_warp >= coalesced.size()) {
continue;
}
if (!(valid &= (data[data_idx(warp_base + target_rank_in_warp)] == target_rank_in_tile))) {
break;
}
}
// Validate
partition.sync();
data[data_idx(global_idx)] = valid;
if constexpr (!use_global) {
global_data[global_idx] = data[data_idx(global_idx)];
}
}
template <bool global_memory, typename T> void CoalescedGroupTiledPartitionSyncTest() {
const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations));
INFO("Run number: " << randomized_run_count + 1);
const auto tile_size = GenerateTileSizes();
INFO("Tile size: " << tile_size);
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
const auto alloc_size_per_block = alloc_size / grid.block_count_;
int max_shared_mem_per_block = 0;
HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block,
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) {
return;
}
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<unsigned int> wait_modifiers_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
LinearAllocGuard<unsigned int> wait_modifiers(LinearAllocs::hipHostMalloc,
grid.thread_count_ * sizeof(unsigned int));
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
const auto warps_in_grid = warps_in_block * grid.block_count_;
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
warps_in_grid * sizeof(uint64_t));
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
warps_in_grid * sizeof(uint64_t));
if (randomized_run_count != 0) {
std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_,
[] { return GenerateRandomInteger(0u, 1500u); });
} else {
std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u);
}
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(),
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block;
coalesced_group_tiled_partition_sync_check<global_memory, kWarpSize>
<<<blocks, threads, shared_memory_size>>>(active_masks_dev.ptr(), arr_dev.ptr(),
wait_modifiers_dev.ptr(), tile_size);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
for (int i = 0u; i < grid.block_count_; ++i) {
for (int j = 0u; j < warps_in_block; ++j) {
const auto warp_idx = i * warps_in_block + j;
auto mask = active_masks.ptr()[warp_idx];
const auto shift_amount =
(tail + 32 * TestContext::get().isNvidia()) * !((warp_idx + 1) % warps_in_block);
mask = (mask << shift_amount) >> shift_amount;
const auto active_count = std::bitset<sizeof(mask) * 8>(mask).count();
const auto start_offset = i * grid.threads_in_block_count_ + j * kWarpSize;
const auto end_offset = start_offset + active_count;
const auto valid =
std::all_of(arr.ptr() + start_offset, arr.ptr() + end_offset, [](T e) { return e; });
if (!valid) {
REQUIRE(valid);
}
}
}
}
/**
* Test Description
* ------------------------
* - Launches a kernel wherein threads in each warp are deactivated based on a passed bitmask.
* Coalesced groups are formed and divided into tiled partitions(size of 2, 4, 8, 16, 32, 64 if AMD)
* and every thread writes its intra-tile rank into an array slot determined by its global warp rank
* and coalesced group rank. The array is either in global or dynamic shared memory based on a
* compile time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the
* write each thread executes a busy wait loop for a random amount of clock cycles, the amount being
* read from an input array. After the write a tile-wide sync is performed and each thread validates
* that it can read the expected values that other threads within the same tile have written to
* their respective array slots.
* Test source
* ------------------------
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
uint64_t counter = 0;
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic", "", uint8_t,
uint16_t, uint32_t) {
SECTION("Global memory") { CoalescedGroupTiledPartitionSyncTest<true, TestType>(); }
SECTION("Shared memory") { CoalescedGroupTiledPartitionSyncTest<false, TestType>(); }
}
+1 -1
Просмотреть файл
@@ -21,7 +21,7 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
#include <hip_test_defgroups.hh>
/**
* @addtogroup coalesced_group thread_block_tile
+1
Просмотреть файл
@@ -76,3 +76,4 @@ template <class T> bool CheckDimensions(unsigned int device, T kernel, dim3 bloc
return true;
}
+1 -1
Просмотреть файл
@@ -18,7 +18,7 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <dlfcn.h>
#include <hip_test_defgroups.hh>
/**
* @addtogroup hipLaunchKernelGGL hipLaunchCooperativeKernel
* @{
+1 -1
Просмотреть файл
@@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_defgroups.hh>
#include <stdio.h>
#include <dlfcn.h>
#include <vector>
+4 -3
Просмотреть файл
@@ -1,14 +1,15 @@
# Common Tests - Test independent of all platforms
set(TEST_SRC
error_handling_common.cc
hipGetErrorName.cc
hipGetErrorString.cc
hipGetLastError.cc
hipPeekAtLastError.cc
hipDrvGetErrorName.cc
hipDrvGetErrorString.cc
hipGetLastError.cc
hipPeekAtLastError.cc
)
hip_add_exe_to_target(NAME ErrorHandlingTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
COMPILE_OPTIONS -std=c++17)
+534
Просмотреть файл
@@ -0,0 +1,534 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "error_handling_common.hh"
const char* ErrorName(hipError_t enumerator) {
switch (enumerator) {
#if HT_AMD
case hipSuccess:
return "hipSuccess";
case hipErrorInvalidValue:
return "hipErrorInvalidValue";
case hipErrorOutOfMemory:
return "hipErrorOutOfMemory";
case hipErrorNotInitialized:
return "hipErrorNotInitialized";
case hipErrorDeinitialized:
return "hipErrorDeinitialized";
case hipErrorProfilerDisabled:
return "hipErrorProfilerDisabled";
case hipErrorProfilerNotInitialized:
return "hipErrorProfilerNotInitialized";
case hipErrorProfilerAlreadyStarted:
return "hipErrorProfilerAlreadyStarted";
case hipErrorProfilerAlreadyStopped:
return "hipErrorProfilerAlreadyStopped";
case hipErrorInvalidConfiguration:
return "hipErrorInvalidConfiguration";
case hipErrorInvalidSymbol:
return "hipErrorInvalidSymbol";
case hipErrorInvalidDevicePointer:
return "hipErrorInvalidDevicePointer";
case hipErrorInvalidMemcpyDirection:
return "hipErrorInvalidMemcpyDirection";
case hipErrorInsufficientDriver:
return "hipErrorInsufficientDriver";
case hipErrorMissingConfiguration:
return "hipErrorMissingConfiguration";
case hipErrorPriorLaunchFailure:
return "hipErrorPriorLaunchFailure";
case hipErrorInvalidDeviceFunction:
return "hipErrorInvalidDeviceFunction";
case hipErrorNoDevice:
return "hipErrorNoDevice";
case hipErrorInvalidDevice:
return "hipErrorInvalidDevice";
case hipErrorInvalidPitchValue:
return "hipErrorInvalidPitchValue";
case hipErrorInvalidImage:
return "hipErrorInvalidImage";
case hipErrorInvalidContext:
return "hipErrorInvalidContext";
case hipErrorContextAlreadyCurrent:
return "hipErrorContextAlreadyCurrent";
case hipErrorMapFailed:
return "hipErrorMapFailed";
case hipErrorUnmapFailed:
return "hipErrorUnmapFailed";
case hipErrorArrayIsMapped:
return "hipErrorArrayIsMapped";
case hipErrorAlreadyMapped:
return "hipErrorAlreadyMapped";
case hipErrorNoBinaryForGpu:
return "hipErrorNoBinaryForGpu";
case hipErrorAlreadyAcquired:
return "hipErrorAlreadyAcquired";
case hipErrorNotMapped:
return "hipErrorNotMapped";
case hipErrorNotMappedAsArray:
return "hipErrorNotMappedAsArray";
case hipErrorNotMappedAsPointer:
return "hipErrorNotMappedAsPointer";
case hipErrorECCNotCorrectable:
return "hipErrorECCNotCorrectable";
case hipErrorUnsupportedLimit:
return "hipErrorUnsupportedLimit";
case hipErrorContextAlreadyInUse:
return "hipErrorContextAlreadyInUse";
case hipErrorPeerAccessUnsupported:
return "hipErrorPeerAccessUnsupported";
case hipErrorInvalidKernelFile:
return "hipErrorInvalidKernelFile";
case hipErrorInvalidGraphicsContext:
return "hipErrorInvalidGraphicsContext";
case hipErrorInvalidSource:
return "hipErrorInvalidSource";
case hipErrorFileNotFound:
return "hipErrorFileNotFound";
case hipErrorSharedObjectSymbolNotFound:
return "hipErrorSharedObjectSymbolNotFound";
case hipErrorSharedObjectInitFailed:
return "hipErrorSharedObjectInitFailed";
case hipErrorOperatingSystem:
return "hipErrorOperatingSystem";
case hipErrorInvalidHandle:
return "hipErrorInvalidHandle";
case hipErrorIllegalState:
return "hipErrorIllegalState";
case hipErrorNotFound:
return "hipErrorNotFound";
case hipErrorNotReady:
return "hipErrorNotReady";
case hipErrorIllegalAddress:
return "hipErrorIllegalAddress";
case hipErrorLaunchOutOfResources:
return "hipErrorLaunchOutOfResources";
case hipErrorLaunchTimeOut:
return "hipErrorLaunchTimeOut";
case hipErrorPeerAccessAlreadyEnabled:
return "hipErrorPeerAccessAlreadyEnabled";
case hipErrorPeerAccessNotEnabled:
return "hipErrorPeerAccessNotEnabled";
case hipErrorSetOnActiveProcess:
return "hipErrorSetOnActiveProcess";
case hipErrorContextIsDestroyed:
return "hipErrorContextIsDestroyed";
case hipErrorAssert:
return "hipErrorAssert";
case hipErrorHostMemoryAlreadyRegistered:
return "hipErrorHostMemoryAlreadyRegistered";
case hipErrorHostMemoryNotRegistered:
return "hipErrorHostMemoryNotRegistered";
case hipErrorLaunchFailure:
return "hipErrorLaunchFailure";
case hipErrorNotSupported:
return "hipErrorNotSupported";
case hipErrorUnknown:
return "hipErrorUnknown";
case hipErrorRuntimeMemory:
return "hipErrorRuntimeMemory";
case hipErrorRuntimeOther:
return "hipErrorRuntimeOther";
case hipErrorCooperativeLaunchTooLarge:
return "hipErrorCooperativeLaunchTooLarge";
case hipErrorStreamCaptureUnsupported:
return "hipErrorStreamCaptureUnsupported";
case hipErrorStreamCaptureInvalidated:
return "hipErrorStreamCaptureInvalidated";
case hipErrorStreamCaptureMerge:
return "hipErrorStreamCaptureMerge";
case hipErrorStreamCaptureUnmatched:
return "hipErrorStreamCaptureUnmatched";
case hipErrorStreamCaptureUnjoined:
return "hipErrorStreamCaptureUnjoined";
case hipErrorStreamCaptureIsolation:
return "hipErrorStreamCaptureIsolation";
case hipErrorStreamCaptureImplicit:
return "hipErrorStreamCaptureImplicit";
case hipErrorCapturedEvent:
return "hipErrorCapturedEvent";
case hipErrorStreamCaptureWrongThread:
return "hipErrorStreamCaptureWrongThread";
case hipErrorGraphExecUpdateFailure:
return "hipErrorGraphExecUpdateFailure";
case hipErrorTbd:
return "hipErrorTbd";
default:
return "hipErrorUnknown";
#else
case hipSuccess:
return "CUDA_SUCCESS";
case hipErrorInvalidValue:
return "CUDA_ERROR_INVALID_VALUE";
case hipErrorOutOfMemory:
return "CUDA_ERROR_OUT_OF_MEMORY";
case hipErrorNotInitialized:
return "CUDA_ERROR_NOT_INITIALIZED";
case hipErrorDeinitialized:
return "CUDA_ERROR_DEINITIALIZED";
case hipErrorProfilerDisabled:
return "CUDA_ERROR_PROFILER_DISABLED";
case hipErrorProfilerNotInitialized:
return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
case hipErrorProfilerAlreadyStarted:
return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
case hipErrorProfilerAlreadyStopped:
return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
case hipErrorInvalidConfiguration:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidSymbol:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidDevicePointer:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidMemcpyDirection:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInsufficientDriver:
return "CUDA_ERROR_UNKNOWN";
case hipErrorMissingConfiguration:
return "CUDA_ERROR_UNKNOWN";
case hipErrorPriorLaunchFailure:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidDeviceFunction:
return "CUDA_ERROR_UNKNOWN";
case hipErrorNoDevice:
return "CUDA_ERROR_NO_DEVICE";
case hipErrorInvalidDevice:
return "CUDA_ERROR_INVALID_DEVICE";
case hipErrorInvalidPitchValue:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidImage:
return "CUDA_ERROR_INVALID_IMAGE";
case hipErrorInvalidContext:
return "CUDA_ERROR_INVALID_CONTEXT";
case hipErrorContextAlreadyCurrent:
return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
case hipErrorMapFailed:
return "CUDA_ERROR_MAP_FAILED";
case hipErrorUnmapFailed:
return "CUDA_ERROR_UNMAP_FAILED";
case hipErrorArrayIsMapped:
return "CUDA_ERROR_ARRAY_IS_MAPPED";
case hipErrorAlreadyMapped:
return "CUDA_ERROR_ALREADY_MAPPED";
case hipErrorNoBinaryForGpu:
return "CUDA_ERROR_NO_BINARY_FOR_GPU";
case hipErrorAlreadyAcquired:
return "CUDA_ERROR_ALREADY_ACQUIRED";
case hipErrorNotMapped:
return "CUDA_ERROR_NOT_MAPPED";
case hipErrorNotMappedAsArray:
return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
case hipErrorNotMappedAsPointer:
return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
case hipErrorECCNotCorrectable:
return "CUDA_ERROR_ECC_UNCORRECTABLE";
case hipErrorUnsupportedLimit:
return "CUDA_ERROR_UNSUPPORTED_LIMIT";
case hipErrorContextAlreadyInUse:
return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
case hipErrorPeerAccessUnsupported:
return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
case hipErrorInvalidKernelFile:
return "CUDA_ERROR_INVALID_PTX";
case hipErrorInvalidGraphicsContext:
return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
case hipErrorInvalidSource:
return "CUDA_ERROR_INVALID_SOURCE";
case hipErrorFileNotFound:
return "CUDA_ERROR_FILE_NOT_FOUND";
case hipErrorSharedObjectSymbolNotFound:
return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
case hipErrorSharedObjectInitFailed:
return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
case hipErrorOperatingSystem:
return "CUDA_ERROR_OPERATING_SYSTEM";
case hipErrorInvalidHandle:
return "CUDA_ERROR_INVALID_HANDLE";
case hipErrorIllegalState:
return "CUDA_ERROR_ILLEGAL_STATE";
case hipErrorNotFound:
return "CUDA_ERROR_NOT_FOUND";
case hipErrorNotReady:
return "CUDA_ERROR_NOT_READY";
case hipErrorIllegalAddress:
return "CUDA_ERROR_ILLEGAL_ADDRESS";
case hipErrorLaunchOutOfResources:
return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
case hipErrorLaunchTimeOut:
return "CUDA_ERROR_LAUNCH_TIMEOUT";
case hipErrorPeerAccessAlreadyEnabled:
return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
case hipErrorPeerAccessNotEnabled:
return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
case hipErrorSetOnActiveProcess:
return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
case hipErrorContextIsDestroyed:
return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
case hipErrorAssert:
return "CUDA_ERROR_ASSERT";
case hipErrorHostMemoryAlreadyRegistered:
return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
case hipErrorHostMemoryNotRegistered:
return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
case hipErrorLaunchFailure:
return "CUDA_ERROR_LAUNCH_FAILED";
case hipErrorNotSupported:
return "CUDA_ERROR_NOT_SUPPORTED";
case hipErrorUnknown:
return "CUDA_ERROR_UNKNOWN";
case hipErrorRuntimeMemory:
return "CUDA_ERROR_UNKNOWN";
case hipErrorRuntimeOther:
return "CUDA_ERROR_UNKNOWN";
case hipErrorCooperativeLaunchTooLarge:
return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE";
case hipErrorStreamCaptureUnsupported:
return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED";
case hipErrorStreamCaptureInvalidated:
return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED";
case hipErrorStreamCaptureMerge:
return "CUDA_ERROR_STREAM_CAPTURE_MERGE";
case hipErrorStreamCaptureUnmatched:
return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED";
case hipErrorStreamCaptureUnjoined:
return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED";
case hipErrorStreamCaptureIsolation:
return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION";
case hipErrorStreamCaptureImplicit:
return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT";
case hipErrorCapturedEvent:
return "CUDA_ERROR_CAPTURED_EVENT";
case hipErrorStreamCaptureWrongThread:
return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD";
case hipErrorGraphExecUpdateFailure:
return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE";
default:
return "CUDA_ERROR_UNKNOWN";
#endif
}
}
const char* ErrorString(hipError_t enumerator) {
switch (enumerator) {
case hipSuccess:
return "no error";
case hipErrorInvalidValue:
return "invalid argument";
case hipErrorOutOfMemory:
return "out of memory";
case hipErrorNotInitialized:
return "initialization error";
case hipErrorDeinitialized:
return "driver shutting down";
case hipErrorProfilerDisabled:
return "profiler disabled while using external profiling tool";
case hipErrorProfilerNotInitialized:
#if HT_AMD
return "profiler is not initialized";
#elif HT_NVIDIA
return "profiler not initialized: call cudaProfilerInitialize()";
#endif
case hipErrorProfilerAlreadyStarted:
return "profiler already started";
case hipErrorProfilerAlreadyStopped:
return "profiler already stopped";
#if HT_AMD
case hipErrorInvalidConfiguration:
return "invalid configuration argument";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidPitchValue:
return "invalid pitch argument";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidSymbol:
return "invalid device symbol";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidDevicePointer:
return "invalid device pointer";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidMemcpyDirection:
return "invalid copy direction for memcpy";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInsufficientDriver:
return "driver version is insufficient for runtime version";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorMissingConfiguration:
return "__global__ function call is not configured";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorPriorLaunchFailure:
return "unspecified launch failure in prior launch";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidDeviceFunction:
return "invalid device function";
#elif HT_NVIDIA
return "unknown error";
#endif
case hipErrorNoDevice:
#if HT_AMD
return "no ROCm-capable device is detected";
#elif HT_NVIDIA
return "no CUDA-capable device is detected";
#endif
case hipErrorInvalidDevice:
return "invalid device ordinal";
case hipErrorInvalidImage:
return "device kernel image is invalid";
case hipErrorInvalidContext:
return "invalid device context";
case hipErrorContextAlreadyCurrent:
#if HT_AMD
return "context is already current context";
#elif HT_NVIDIA
return "context already current";
#endif
case hipErrorMapFailed:
return "mapping of buffer object failed";
case hipErrorUnmapFailed:
return "unmapping of buffer object failed";
case hipErrorArrayIsMapped:
return "array is mapped";
case hipErrorAlreadyMapped:
return "resource already mapped";
case hipErrorNoBinaryForGpu:
return "no kernel image is available for execution on the device";
case hipErrorAlreadyAcquired:
return "resource already acquired";
case hipErrorNotMapped:
return "resource not mapped";
case hipErrorNotMappedAsArray:
return "resource not mapped as array";
case hipErrorNotMappedAsPointer:
return "resource not mapped as pointer";
case hipErrorECCNotCorrectable:
return "uncorrectable ECC error encountered";
case hipErrorUnsupportedLimit:
return "limit is not supported on this architecture";
case hipErrorContextAlreadyInUse:
return "exclusive-thread device already in use by a different thread";
case hipErrorPeerAccessUnsupported:
return "peer access is not supported between these two devices";
case hipErrorInvalidKernelFile:
#if HT_AMD
return "invalid kernel file";
#elif HT_NVIDIA
return "a PTX JIT compilation failed";
#endif
case hipErrorInvalidGraphicsContext:
return "invalid OpenGL or DirectX context";
case hipErrorInvalidSource:
return "device kernel image is invalid";
case hipErrorFileNotFound:
return "file not found";
case hipErrorSharedObjectSymbolNotFound:
return "shared object symbol not found";
case hipErrorSharedObjectInitFailed:
return "shared object initialization failed";
case hipErrorOperatingSystem:
return "OS call failed or operation not supported on this OS";
case hipErrorInvalidHandle:
return "invalid resource handle";
case hipErrorIllegalState:
return "the operation cannot be performed in the present state";
case hipErrorNotFound:
return "named symbol not found";
case hipErrorNotReady:
return "device not ready";
case hipErrorIllegalAddress:
return "an illegal memory access was encountered";
case hipErrorLaunchOutOfResources:
return "too many resources requested for launch";
case hipErrorLaunchTimeOut:
return "the launch timed out and was terminated";
case hipErrorPeerAccessAlreadyEnabled:
return "peer access is already enabled";
case hipErrorPeerAccessNotEnabled:
return "peer access has not been enabled";
case hipErrorSetOnActiveProcess:
return "cannot set while device is active in this process";
case hipErrorContextIsDestroyed:
return "context is destroyed";
case hipErrorAssert:
return "device-side assert triggered";
case hipErrorHostMemoryAlreadyRegistered:
return "part or all of the requested memory range is already mapped";
case hipErrorHostMemoryNotRegistered:
return "pointer does not correspond to a registered memory region";
case hipErrorLaunchFailure:
return "unspecified launch failure";
case hipErrorCooperativeLaunchTooLarge:
return "too many blocks in cooperative launch";
case hipErrorNotSupported:
return "operation not supported";
case hipErrorStreamCaptureUnsupported:
return "operation not permitted when stream is capturing";
case hipErrorStreamCaptureInvalidated:
return "operation failed due to a previous error during capture";
case hipErrorStreamCaptureMerge:
return "operation would result in a merge of separate capture sequences";
case hipErrorStreamCaptureUnmatched:
return "capture was not ended in the same stream as it began";
case hipErrorStreamCaptureUnjoined:
return "capturing stream has unjoined work";
case hipErrorStreamCaptureIsolation:
return "dependency created on uncaptured work in another stream";
case hipErrorStreamCaptureImplicit:
return "operation would make the legacy stream depend on a capturing blocking stream"; // NOLINT
case hipErrorCapturedEvent:
return "operation not permitted on an event last recorded in a capturing stream"; // NOLINT
case hipErrorStreamCaptureWrongThread:
return "attempt to terminate a thread-local capture sequence from another thread"; // NOLINT
case hipErrorGraphExecUpdateFailure:
return "the graph update was not performed because it included changes which violated "
"constraints specific to instantiated graph update"; // NOLINT
case hipErrorRuntimeMemory:
return "runtime memory call returned error";
case hipErrorRuntimeOther:
return "runtime call other than memory returned error";
case hipErrorUnknown:
default:
return "unknown error";
}
}
+10 -6
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -33,7 +33,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess,
hipErrorProfilerNotInitialized,
hipErrorProfilerAlreadyStarted,
hipErrorProfilerAlreadyStopped,
#if HT_AMD
#if HT_AMD
hipErrorInvalidConfiguration,
hipErrorInvalidPitchValue,
hipErrorInvalidSymbol,
@@ -43,7 +43,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess,
hipErrorMissingConfiguration,
hipErrorPriorLaunchFailure,
hipErrorInvalidDeviceFunction,
#endif
#endif
hipErrorNoDevice,
hipErrorInvalidDevice,
hipErrorInvalidImage,
@@ -97,8 +97,12 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess,
hipErrorStreamCaptureWrongThread,
hipErrorGraphExecUpdateFailure,
hipErrorUnknown,
#if HT_AMD
#if HT_AMD
hipErrorRuntimeMemory,
hipErrorRuntimeOther
#endif
};
#endif
};
const char* ErrorName(hipError_t enumerator);
const char* ErrorString(hipError_t enumerator);
+49 -329
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -17,347 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include "errorEnumerators.h"
// Local Function to return the error code in string
#include "error_handling_common.hh"
static const char *ErrorName(hipError_t enumerator) {
switch (enumerator) {
#if HT_AMD
case hipSuccess:
return "hipSuccess";
case hipErrorInvalidValue:
return "hipErrorInvalidValue";
case hipErrorOutOfMemory:
return "hipErrorOutOfMemory";
case hipErrorNotInitialized:
return "hipErrorNotInitialized";
case hipErrorDeinitialized:
return "hipErrorDeinitialized";
case hipErrorProfilerDisabled:
return "hipErrorProfilerDisabled";
case hipErrorProfilerNotInitialized:
return "hipErrorProfilerNotInitialized";
case hipErrorProfilerAlreadyStarted:
return "hipErrorProfilerAlreadyStarted";
case hipErrorProfilerAlreadyStopped:
return "hipErrorProfilerAlreadyStopped";
case hipErrorInvalidConfiguration:
return "hipErrorInvalidConfiguration";
case hipErrorInvalidSymbol:
return "hipErrorInvalidSymbol";
case hipErrorInvalidDevicePointer:
return "hipErrorInvalidDevicePointer";
case hipErrorInvalidMemcpyDirection:
return "hipErrorInvalidMemcpyDirection";
case hipErrorInsufficientDriver:
return "hipErrorInsufficientDriver";
case hipErrorMissingConfiguration:
return "hipErrorMissingConfiguration";
case hipErrorPriorLaunchFailure:
return "hipErrorPriorLaunchFailure";
case hipErrorInvalidDeviceFunction:
return "hipErrorInvalidDeviceFunction";
case hipErrorNoDevice:
return "hipErrorNoDevice";
case hipErrorInvalidDevice:
return "hipErrorInvalidDevice";
case hipErrorInvalidPitchValue:
return "hipErrorInvalidPitchValue";
case hipErrorInvalidImage:
return "hipErrorInvalidImage";
case hipErrorInvalidContext:
return "hipErrorInvalidContext";
case hipErrorContextAlreadyCurrent:
return "hipErrorContextAlreadyCurrent";
case hipErrorMapFailed:
return "hipErrorMapFailed";
case hipErrorUnmapFailed:
return "hipErrorUnmapFailed";
case hipErrorArrayIsMapped:
return "hipErrorArrayIsMapped";
case hipErrorAlreadyMapped:
return "hipErrorAlreadyMapped";
case hipErrorNoBinaryForGpu:
return "hipErrorNoBinaryForGpu";
case hipErrorAlreadyAcquired:
return "hipErrorAlreadyAcquired";
case hipErrorNotMapped:
return "hipErrorNotMapped";
case hipErrorNotMappedAsArray:
return "hipErrorNotMappedAsArray";
case hipErrorNotMappedAsPointer:
return "hipErrorNotMappedAsPointer";
case hipErrorECCNotCorrectable:
return "hipErrorECCNotCorrectable";
case hipErrorUnsupportedLimit:
return "hipErrorUnsupportedLimit";
case hipErrorContextAlreadyInUse:
return "hipErrorContextAlreadyInUse";
case hipErrorPeerAccessUnsupported:
return "hipErrorPeerAccessUnsupported";
case hipErrorInvalidKernelFile:
return "hipErrorInvalidKernelFile";
case hipErrorInvalidGraphicsContext:
return "hipErrorInvalidGraphicsContext";
case hipErrorInvalidSource:
return "hipErrorInvalidSource";
case hipErrorFileNotFound:
return "hipErrorFileNotFound";
case hipErrorSharedObjectSymbolNotFound:
return "hipErrorSharedObjectSymbolNotFound";
case hipErrorSharedObjectInitFailed:
return "hipErrorSharedObjectInitFailed";
case hipErrorOperatingSystem:
return "hipErrorOperatingSystem";
case hipErrorInvalidHandle:
return "hipErrorInvalidHandle";
case hipErrorIllegalState:
return "hipErrorIllegalState";
case hipErrorNotFound:
return "hipErrorNotFound";
case hipErrorNotReady:
return "hipErrorNotReady";
case hipErrorIllegalAddress:
return "hipErrorIllegalAddress";
case hipErrorLaunchOutOfResources:
return "hipErrorLaunchOutOfResources";
case hipErrorLaunchTimeOut:
return "hipErrorLaunchTimeOut";
case hipErrorPeerAccessAlreadyEnabled:
return "hipErrorPeerAccessAlreadyEnabled";
case hipErrorPeerAccessNotEnabled:
return "hipErrorPeerAccessNotEnabled";
case hipErrorSetOnActiveProcess:
return "hipErrorSetOnActiveProcess";
case hipErrorContextIsDestroyed:
return "hipErrorContextIsDestroyed";
case hipErrorAssert:
return "hipErrorAssert";
case hipErrorHostMemoryAlreadyRegistered:
return "hipErrorHostMemoryAlreadyRegistered";
case hipErrorHostMemoryNotRegistered:
return "hipErrorHostMemoryNotRegistered";
case hipErrorLaunchFailure:
return "hipErrorLaunchFailure";
case hipErrorNotSupported:
return "hipErrorNotSupported";
case hipErrorUnknown:
return "hipErrorUnknown";
case hipErrorRuntimeMemory:
return "hipErrorRuntimeMemory";
case hipErrorRuntimeOther:
return "hipErrorRuntimeOther";
case hipErrorCooperativeLaunchTooLarge:
return "hipErrorCooperativeLaunchTooLarge";
case hipErrorStreamCaptureUnsupported:
return "hipErrorStreamCaptureUnsupported";
case hipErrorStreamCaptureInvalidated:
return "hipErrorStreamCaptureInvalidated";
case hipErrorStreamCaptureMerge:
return "hipErrorStreamCaptureMerge";
case hipErrorStreamCaptureUnmatched:
return "hipErrorStreamCaptureUnmatched";
case hipErrorStreamCaptureUnjoined:
return "hipErrorStreamCaptureUnjoined";
case hipErrorStreamCaptureIsolation:
return "hipErrorStreamCaptureIsolation";
case hipErrorStreamCaptureImplicit:
return "hipErrorStreamCaptureImplicit";
case hipErrorCapturedEvent:
return "hipErrorCapturedEvent";
case hipErrorStreamCaptureWrongThread:
return "hipErrorStreamCaptureWrongThread";
case hipErrorGraphExecUpdateFailure:
return "hipErrorGraphExecUpdateFailure";
case hipErrorTbd:
return "hipErrorTbd";
default:
return "hipErrorUnknown";
#endif
#if HT_NVIDIA
case hipSuccess:
return "CUDA_SUCCESS";
case hipErrorInvalidValue:
return "CUDA_ERROR_INVALID_VALUE";
case hipErrorOutOfMemory:
return "CUDA_ERROR_OUT_OF_MEMORY";
case hipErrorNotInitialized:
return "CUDA_ERROR_NOT_INITIALIZED";
case hipErrorDeinitialized:
return "CUDA_ERROR_DEINITIALIZED";
case hipErrorProfilerDisabled:
return "CUDA_ERROR_PROFILER_DISABLED";
case hipErrorProfilerNotInitialized:
return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
case hipErrorProfilerAlreadyStarted:
return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
case hipErrorProfilerAlreadyStopped:
return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
case hipErrorInvalidConfiguration:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidSymbol:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidDevicePointer:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidMemcpyDirection:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInsufficientDriver:
return "CUDA_ERROR_UNKNOWN";
case hipErrorMissingConfiguration:
return "CUDA_ERROR_UNKNOWN";
case hipErrorPriorLaunchFailure:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidDeviceFunction:
return "CUDA_ERROR_UNKNOWN";
case hipErrorNoDevice:
return "CUDA_ERROR_NO_DEVICE";
case hipErrorInvalidDevice:
return "CUDA_ERROR_INVALID_DEVICE";
case hipErrorInvalidPitchValue:
return "CUDA_ERROR_UNKNOWN";
case hipErrorInvalidImage:
return "CUDA_ERROR_INVALID_IMAGE";
case hipErrorInvalidContext:
return "CUDA_ERROR_INVALID_CONTEXT";
case hipErrorContextAlreadyCurrent:
return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
case hipErrorMapFailed:
return "CUDA_ERROR_MAP_FAILED";
case hipErrorUnmapFailed:
return "CUDA_ERROR_UNMAP_FAILED";
case hipErrorArrayIsMapped:
return "CUDA_ERROR_ARRAY_IS_MAPPED";
case hipErrorAlreadyMapped:
return "CUDA_ERROR_ALREADY_MAPPED";
case hipErrorNoBinaryForGpu:
return "CUDA_ERROR_NO_BINARY_FOR_GPU";
case hipErrorAlreadyAcquired:
return "CUDA_ERROR_ALREADY_ACQUIRED";
case hipErrorNotMapped:
return "CUDA_ERROR_NOT_MAPPED";
case hipErrorNotMappedAsArray:
return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
case hipErrorNotMappedAsPointer:
return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
case hipErrorECCNotCorrectable:
return "CUDA_ERROR_ECC_UNCORRECTABLE";
case hipErrorUnsupportedLimit:
return "CUDA_ERROR_UNSUPPORTED_LIMIT";
case hipErrorContextAlreadyInUse:
return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
case hipErrorPeerAccessUnsupported:
return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
case hipErrorInvalidKernelFile:
return "CUDA_ERROR_INVALID_PTX";
case hipErrorInvalidGraphicsContext:
return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
case hipErrorInvalidSource:
return "CUDA_ERROR_INVALID_SOURCE";
case hipErrorFileNotFound:
return "CUDA_ERROR_FILE_NOT_FOUND";
case hipErrorSharedObjectSymbolNotFound:
return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
case hipErrorSharedObjectInitFailed:
return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
case hipErrorOperatingSystem:
return "CUDA_ERROR_OPERATING_SYSTEM";
case hipErrorInvalidHandle:
return "CUDA_ERROR_INVALID_HANDLE";
case hipErrorIllegalState:
return "CUDA_ERROR_ILLEGAL_STATE";
case hipErrorNotFound:
return "CUDA_ERROR_NOT_FOUND";
case hipErrorNotReady:
return "CUDA_ERROR_NOT_READY";
case hipErrorIllegalAddress:
return "CUDA_ERROR_ILLEGAL_ADDRESS";
case hipErrorLaunchOutOfResources:
return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
case hipErrorLaunchTimeOut:
return "CUDA_ERROR_LAUNCH_TIMEOUT";
case hipErrorPeerAccessAlreadyEnabled:
return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
case hipErrorPeerAccessNotEnabled:
return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
case hipErrorSetOnActiveProcess:
return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
case hipErrorContextIsDestroyed:
return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
case hipErrorAssert:
return "CUDA_ERROR_ASSERT";
case hipErrorHostMemoryAlreadyRegistered:
return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
case hipErrorHostMemoryNotRegistered:
return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
case hipErrorLaunchFailure:
return "CUDA_ERROR_LAUNCH_FAILED";
case hipErrorNotSupported:
return "CUDA_ERROR_NOT_SUPPORTED";
case hipErrorUnknown:
return "CUDA_ERROR_UNKNOWN";
case hipErrorRuntimeMemory:
return "CUDA_ERROR_UNKNOWN";
case hipErrorRuntimeOther:
return "CUDA_ERROR_UNKNOWN";
case hipErrorCooperativeLaunchTooLarge:
return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE";
case hipErrorStreamCaptureUnsupported:
return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED";
case hipErrorStreamCaptureInvalidated:
return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED";
case hipErrorStreamCaptureMerge:
return "CUDA_ERROR_STREAM_CAPTURE_MERGE";
case hipErrorStreamCaptureUnmatched:
return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED";
case hipErrorStreamCaptureUnjoined:
return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED";
case hipErrorStreamCaptureIsolation:
return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION";
case hipErrorStreamCaptureImplicit:
return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT";
case hipErrorCapturedEvent:
return "CUDA_ERROR_CAPTURED_EVENT";
case hipErrorStreamCaptureWrongThread:
return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD";
case hipErrorGraphExecUpdateFailure:
return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE";
default:
return "CUDA_ERROR_UNKNOWN";
#endif
}
}
/**
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
* @{
* @ingroup ErrorTest
* `hipDrvGetErrorName(hipError_t hip_error)` -
* Return hip error as text string form.
*/
// Functional test case
// Test case to verify the returned error name is same as generated error name.
TEST_CASE("Unit_hipDrvGetErrorName_Functional") {
/**
* Test Description
* ------------------------
* - Validate that the correct string is returned for each supported
* device error enumeration.
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorName.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
const char* error_string = nullptr;
hipError_t error_ret;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators),
std::end(kErrorEnumerators)));
error_ret = hipDrvGetErrorName(enumerator, &error_string);
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
REQUIRE(error_string != nullptr);
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
REQUIRE(error_ret == hipSuccess);
}
// Negative test cases.
TEST_CASE("Unit_hipDrvGetErrorName_Negative") {
/**
* Test Description
* ------------------------
* - Validate handling of invalid arguments:
* -# When error enumerator is invalid (-1)
* - AMD expected output: return "hipErrorUnknown"
* - NVIDIA expected output: return "cudaErrorUnknown"
* -# When nullptr is passed as store location
* - Expected output: return "hipErrorInvalidValue"
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorName.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
const char* error_string = nullptr;
SECTION("pass unknown value to hipError") {
REQUIRE((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string))
== hipErrorInvalidValue);
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
hipErrorInvalidValue);
}
#if HT_AMD
#if HT_AMD // segfaults on NVIDIA
SECTION("pass nullptr to error string") {
REQUIRE((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr))
== hipErrorInvalidValue);
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
}
#endif
#endif
}
+49 -229
Просмотреть файл
@@ -1,5 +1,5 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -17,247 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include "errorEnumerators.h"
// Local Function to return the error string.
#include "error_handling_common.hh"
static const char *ErrorString(hipError_t enumerator) {
switch (enumerator) {
case hipSuccess:
return "no error";
case hipErrorInvalidValue:
return "invalid argument";
case hipErrorOutOfMemory:
return "out of memory";
case hipErrorNotInitialized:
return "initialization error";
case hipErrorDeinitialized:
return "driver shutting down";
case hipErrorProfilerDisabled:
return "profiler disabled while using external profiling tool";
case hipErrorProfilerNotInitialized:
#if HT_AMD
return "profiler is not initialized";
#elif HT_NVIDIA
return "profiler not initialized: call cudaProfilerInitialize()";
#endif
case hipErrorProfilerAlreadyStarted:
return "profiler already started";
case hipErrorProfilerAlreadyStopped:
return "profiler already stopped";
#if HT_AMD
case hipErrorInvalidConfiguration:
return "invalid configuration argument";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidPitchValue:
return "invalid pitch argument";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidSymbol:
return "invalid device symbol";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidDevicePointer:
return "invalid device pointer";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidMemcpyDirection:
return "invalid copy direction for memcpy";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInsufficientDriver:
return "driver version is insufficient for runtime version";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorMissingConfiguration:
return "__global__ function call is not configured";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorPriorLaunchFailure:
return "unspecified launch failure in prior launch";
#elif HT_NVIDIA
return "unknown error";
#endif
#if HT_AMD
case hipErrorInvalidDeviceFunction:
return "invalid device function";
#elif HT_NVIDIA
return "unknown error";
#endif
case hipErrorNoDevice:
#if HT_AMD
return "no ROCm-capable device is detected";
#elif HT_NVIDIA
return "no CUDA-capable device is detected";
#endif
case hipErrorInvalidDevice:
return "invalid device ordinal";
case hipErrorInvalidImage:
return "device kernel image is invalid";
case hipErrorInvalidContext:
return "invalid device context";
case hipErrorContextAlreadyCurrent:
#if HT_AMD
return "context is already current context";
#elif HT_NVIDIA
return "context already current";
#endif
case hipErrorMapFailed:
return "mapping of buffer object failed";
case hipErrorUnmapFailed:
return "unmapping of buffer object failed";
case hipErrorArrayIsMapped:
return "array is mapped";
case hipErrorAlreadyMapped:
return "resource already mapped";
case hipErrorNoBinaryForGpu:
return "no kernel image is available for execution on the device";
case hipErrorAlreadyAcquired:
return "resource already acquired";
case hipErrorNotMapped:
return "resource not mapped";
case hipErrorNotMappedAsArray:
return "resource not mapped as array";
case hipErrorNotMappedAsPointer:
return "resource not mapped as pointer";
case hipErrorECCNotCorrectable:
return "uncorrectable ECC error encountered";
case hipErrorUnsupportedLimit:
return "limit is not supported on this architecture";
case hipErrorContextAlreadyInUse:
return "exclusive-thread device already in use by a different thread";
case hipErrorPeerAccessUnsupported:
return "peer access is not supported between these two devices";
case hipErrorInvalidKernelFile:
#if HT_AMD
return "invalid kernel file";
#elif HT_NVIDIA
return "a PTX JIT compilation failed";
#endif
case hipErrorInvalidGraphicsContext:
return "invalid OpenGL or DirectX context";
case hipErrorInvalidSource:
return "device kernel image is invalid";
case hipErrorFileNotFound:
return "file not found";
case hipErrorSharedObjectSymbolNotFound:
return "shared object symbol not found";
case hipErrorSharedObjectInitFailed:
return "shared object initialization failed";
case hipErrorOperatingSystem:
return "OS call failed or operation not supported on this OS";
case hipErrorInvalidHandle:
return "invalid resource handle";
case hipErrorIllegalState:
return "the operation cannot be performed in the present state";
case hipErrorNotFound:
return "named symbol not found";
case hipErrorNotReady:
return "device not ready";
case hipErrorIllegalAddress:
return "an illegal memory access was encountered";
case hipErrorLaunchOutOfResources:
return "too many resources requested for launch";
case hipErrorLaunchTimeOut:
return "the launch timed out and was terminated";
case hipErrorPeerAccessAlreadyEnabled:
return "peer access is already enabled";
case hipErrorPeerAccessNotEnabled:
return "peer access has not been enabled";
case hipErrorSetOnActiveProcess:
return "cannot set while device is active in this process";
case hipErrorContextIsDestroyed:
return "context is destroyed";
case hipErrorAssert:
return "device-side assert triggered";
case hipErrorHostMemoryAlreadyRegistered:
return "part or all of the requested memory range is already mapped";
case hipErrorHostMemoryNotRegistered:
return "pointer does not correspond to a registered memory region";
case hipErrorLaunchFailure:
return "unspecified launch failure";
case hipErrorCooperativeLaunchTooLarge:
return "too many blocks in cooperative launch";
case hipErrorNotSupported:
return "operation not supported";
case hipErrorStreamCaptureUnsupported:
return "operation not permitted when stream is capturing";
case hipErrorStreamCaptureInvalidated:
return "operation failed due to a previous error during capture";
case hipErrorStreamCaptureMerge:
return "operation would result in a merge of separate capture sequences";
case hipErrorStreamCaptureUnmatched:
return "capture was not ended in the same stream as it began";
case hipErrorStreamCaptureUnjoined:
return "capturing stream has unjoined work";
case hipErrorStreamCaptureIsolation:
return "dependency created on uncaptured work in another stream";
case hipErrorStreamCaptureImplicit:
return "operation would make the legacy stream depend on a capturing blocking stream"; //NOLINT
case hipErrorCapturedEvent:
return "operation not permitted on an event last recorded in a capturing stream"; //NOLINT
case hipErrorStreamCaptureWrongThread:
return "attempt to terminate a thread-local capture sequence from another thread"; //NOLINT
case hipErrorGraphExecUpdateFailure:
return "the graph update was not performed because it included changes which violated constraints specific to instantiated graph update"; //NOLINT
case hipErrorRuntimeMemory:
return "runtime memory call returned error";
case hipErrorRuntimeOther:
return "runtime call other than memory returned error";
case hipErrorUnknown:
default:
#if HT_AMD
return "unknown error";
#elif HT_NVIDIA
return "unknown error";
#endif
}
}
/**
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
* @{
* @ingroup ErrorTest
* `hipDrvGetErrorString(hipError_t hipError)` -
* Return handy text string message to explain the error which occurred.
*/
// Test case to verify the returned error string is
// same as generated error string.
TEST_CASE("Unit_hipDrvGetErrorString_Functional") {
/**
* Test Description
* ------------------------
* - Validate that the correct string is returned for each supported
* device error enumeration.
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorString.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators),
std::end(kErrorEnumerators)));
hipError_t error_ret = hipDrvGetErrorString(enumerator, &error_string);
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
REQUIRE(error_string != nullptr);
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
REQUIRE(error_ret == hipSuccess);
}
// Negative test cases.
TEST_CASE("Unit_hipDrvGetErrorString_Negative") {
/**
* Test Description
* ------------------------
* - Validate handling of invalid arguments:
* -# When error enumerator is invalid (-1)
* - Expected output: return "hipErrorInvalidValue"
* -# When nullptr is passed as store location
* - Expected output: return "hipErrorInvalidValue"
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorString.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
const char* error_string = nullptr;
SECTION("pass unknown value to hipError") {
REQUIRE((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string))
== hipErrorInvalidValue);
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
hipErrorInvalidValue);
}
#if HT_AMD
#if HT_AMD // segfaults on NVIDIA
SECTION("pass nullptr to error string") {
REQUIRE((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr))
== hipErrorInvalidValue);
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
hipErrorInvalidValue);
}
#endif
#endif
}
+3 -3
Просмотреть файл
@@ -20,10 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "errorEnumerators.h"
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include "error_handling_common.hh"
/**
* @addtogroup hipGetErrorName hipGetErrorName
@@ -49,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorName_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
error_string = hipGetErrorName(enumerator);
+3 -2
Просмотреть файл
@@ -20,9 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "errorEnumerators.h"
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include "error_handling_common.hh"
/**
* @addtogroup hipGetErrorString hipGetErrorString
@@ -48,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorString_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
error_string = hipGetErrorString(enumerator);
+2 -2
Просмотреть файл
@@ -21,7 +21,6 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <threaded_zig_zag_test.hh>
/**
@@ -56,7 +55,8 @@ TEST_CASE("Unit_hipPeekAtLastError_Positive_Basic") {
* Test Description
* ------------------------
* - Validate that appropriate error is returned when working with multiple threads.
* - Validate that appropriate error is returned for getting the last erro when working with multiple threads.
* - Validate that appropriate error is returned for getting the last error when working with
* multiple threads.
* - Cause error on purpose within one of the threads.
* Test source
* ------------------------
+1 -1
Просмотреть файл
@@ -22,7 +22,7 @@ THE SOFTWARE.
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include <stdlib.h>
constexpr size_t buffer_size = (1024*1024);
+2
Просмотреть файл
@@ -4,6 +4,7 @@ set(TEST_SRC
hipFuncSetSharedMemConfig.cc
hipFuncSetAttribute.cc
hipFuncGetAttributes.cc
hipLaunchKernel.cc
hipLaunchCooperativeKernel.cc
hipLaunchCooperativeKernelMultiDevice.cc
)
@@ -12,6 +13,7 @@ if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC ${TEST_SRC}
hipExtLaunchKernel.cc
hipExtLaunchMultiKernelMultiDevice.cc
launch_api.cc
)
endif()
+8 -8
Просмотреть файл
@@ -49,19 +49,19 @@ TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") {
TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") {
SECTION("blockDim.x == maxBlockDimX") {
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX);
const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0);
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{x, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
}
SECTION("blockDim.y == maxBlockDimY") {
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY);
const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0);
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{y, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
}
SECTION("blockDim.z == maxBlockDimZ") {
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ);
const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0);
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{z, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
}
@@ -111,28 +111,28 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") {
}
SECTION("blockDim.x > maxBlockDimX") {
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u;
const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.y > maxBlockDimY") {
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u;
const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.z > maxBlockDimZ") {
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u;
const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") {
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock);
const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0);
const unsigned int dim = std::ceil(std::cbrt(max));
HIP_CHECK_ERROR(
hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim},
@@ -141,7 +141,7 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") {
}
SECTION("sharedMemBytes > maxSharedMemoryPerBlock") {
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u;
const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u),
hipErrorOutOfMemory);

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше