SWDEV-1 - Merge github PRs to amd-staging
Change-Id: I2944a63ddc2eec8dc1403d9790ffffbaec343385
Этот коммит содержится в:
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -211,6 +211,10 @@
|
||||
"Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory",
|
||||
"=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===",
|
||||
"Unit_hipVectorTypes_test_on_device",
|
||||
"Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4",
|
||||
"Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4",
|
||||
"=== Below test is disabled due to defect EXSWHTEC-347 ===",
|
||||
"Unit_hipPointerSetAttribute_Positive_SyncMemops",
|
||||
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
|
||||
"Unit_hiprtc_stdheaders",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
|
||||
@@ -222,6 +226,12 @@
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
|
||||
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"NOTE: The following test is disabled due to defect - EXSWHTEC-245",
|
||||
"Unit_hipFuncGetAttribute_Negative_Parameters",
|
||||
"Unit_hipMemAddressFree_negative",
|
||||
"Unit_hipMemAddressReserve_AlignmentTest",
|
||||
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
|
||||
"Unit_hipMemCreate_ChkWithKerLaunch",
|
||||
@@ -387,6 +397,261 @@
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Performance_hipMemcpy2D_HostToHost",
|
||||
"Performance_hipMemcpy2DAsync_HostToHost",
|
||||
"Unit_hipDeviceGetGraphMemAttribute_Positive_ReuseMemory",
|
||||
"Unit_hipGraphAddNodeTypeEventWait_Positive_Basic",
|
||||
"Unit_hipDrvGraphAddMemsetNode_Negative_Parameters",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D",
|
||||
"Unit_hipDrvGraphAddMemsetNode_hipMallocManaged",
|
||||
"Unit_hipDrvGraphAddMemcpyNode_Negative_Parameters",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1DLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - char",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - short",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - int",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex3D_Positive_ReadModeElementType - float",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex3DLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex3DGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2Dgather_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2D_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLayered_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_hipDrvGetErrorName_Positive_Basic",
|
||||
"Unit_hipDrvGetErrorString_Positive_Basic",
|
||||
"Unit_hipModuleLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipModuleGetTexRef_Positive_Basic",
|
||||
"Unit_hipExtModuleLaunchKernel_Positive_Basic",
|
||||
"Unit_hipExtModuleLaunchKernel_Negative_Parameters",
|
||||
"Unit_hipLaunchKernel_Negative_Parameters",
|
||||
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
|
||||
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
|
||||
"Unit_AtomicBuiltins_Negative_Parameters_RTC",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-151",
|
||||
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
|
||||
"Note: Test disabled due to defect - EXSWHTEC-152",
|
||||
@@ -446,6 +711,710 @@
|
||||
"Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
|
||||
"Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
|
||||
"Unit_hipGraphUpload_Functional_multidevice_test",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_StaticAssert_Positive_Basic_RTC",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"=== Below tests are disabled due to defect EXSWHTEC-356 ===",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
|
||||
"Unit_Device___hadd_Sanity_Positive",
|
||||
"Unit_Device___uhadd_Sanity_Positive",
|
||||
"Unit_Device___rhadd_Sanity_Positive",
|
||||
"Unit_Device___urhadd_Sanity_Positive",
|
||||
"SWDEV-435667 : Below tests failed in stress test on 19/01/24 ===",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned int",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - unsigned long long",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - float",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic - double",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
|
||||
"Below tests failed in stress test of 25/01/24 ===",
|
||||
"Unit_atomicAnd_Positive_SameAddress - int",
|
||||
"Unit_atomicAnd_Positive_SameAddress - unsigned int",
|
||||
"Unit_atomicAnd_Positive_SameAddress - unsigned long",
|
||||
"Unit_atomicAnd_Positive_SameAddress - unsigned long long",
|
||||
"Unit_atomicAnd_Positive_Adjacent_Addresses - int",
|
||||
"Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicAnd_Positive_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicAnd_Positive_Scattered_Addresses - int",
|
||||
"Unit_atomicAnd_Positive_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicAnd_Positive_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - int",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned int",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Same_Address - unsigned long long",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - int",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - int",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicAnd_Negative_Parameters_RTC",
|
||||
"Unit_atomicOr_Positive_SameAddress - int",
|
||||
"Unit_atomicOr_Positive_SameAddress - unsigned int",
|
||||
"Unit_atomicOr_Positive_SameAddress - unsigned long",
|
||||
"Unit_atomicOr_Positive_SameAddress - unsigned long long",
|
||||
"Unit_atomicOr_Positive_Adjacent_Addresses - int",
|
||||
"Unit_atomicOr_Positive_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicOr_Positive_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicOr_Positive_Scattered_Addresses - int",
|
||||
"Unit_atomicOr_Positive_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicOr_Positive_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicOr_Positive_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - int",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned int",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Same_Address - unsigned long long",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - int",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - int",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicOr_Negative_Parameters_RTC",
|
||||
"Unit_atomicXor_Positive_SameAddress - int",
|
||||
"Unit_atomicXor_Positive_SameAddress - unsigned int",
|
||||
"Unit_atomicXor_Positive_SameAddress - unsigned long",
|
||||
"Unit_atomicXor_Positive_SameAddress - unsigned long long",
|
||||
"Unit_atomicXor_Positive_Adjacent_Addresses - int",
|
||||
"Unit_atomicXor_Positive_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicXor_Positive_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicXor_Positive_Scattered_Addresses - int",
|
||||
"Unit_atomicXor_Positive_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicXor_Positive_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicXor_Positive_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - int",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned int",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Same_Address - unsigned long long",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - int",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - int",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicXor_Negative_Parameters_RTC",
|
||||
"Unit_atomicMin_Positive_SameAddress - int",
|
||||
"Unit_atomicMin_Positive_SameAddress - unsigned int",
|
||||
"Unit_atomicMin_Positive_SameAddress - unsigned long",
|
||||
"Unit_atomicMin_Positive_SameAddress - unsigned long long",
|
||||
"Unit_atomicMin_Positive_Adjacent_Addresses - int",
|
||||
"Unit_atomicMin_Positive_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicMin_Positive_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicMin_Positive_Adjacent_Addresses - float",
|
||||
"Unit_atomicMin_Positive_Adjacent_Addresses - double",
|
||||
"Unit_atomicMin_Positive_Scattered_Addresses - int",
|
||||
"Unit_atomicMin_Positive_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicMin_Positive_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicMin_Positive_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicMin_Positive_Scattered_Addresses - float",
|
||||
"Unit_atomicMin_Positive_Scattered_Addresses - double",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - int",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned int",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Same_Address - unsigned long long",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - int",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - int",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - float",
|
||||
"Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses - double",
|
||||
"Unit_atomicMin_Negative_Parameters_RTC",
|
||||
"Unit_atomicMax_Positive_SameAddress - int",
|
||||
"Unit_atomicMax_Positive_SameAddress - unsigned int",
|
||||
"Unit_atomicMax_Positive_SameAddress - unsigned long",
|
||||
"Unit_atomicMax_Positive_SameAddress - unsigned long long",
|
||||
"Unit_atomicMax_Positive_Adjacent_Addresses - int",
|
||||
"Unit_atomicMax_Positive_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicMax_Positive_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicMax_Positive_Adjacent_Addresses - float",
|
||||
"Unit_atomicMax_Positive_Adjacent_Addresses - double",
|
||||
"Unit_atomicMax_Positive_Scattered_Addresses - int",
|
||||
"Unit_atomicMax_Positive_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicMax_Positive_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicMax_Positive_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicMax_Positive_Scattered_Addresses - float",
|
||||
"Unit_atomicMax_Positive_Scattered_Addresses - double",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - int",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned int",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Same_Address - unsigned long long",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - int",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned int",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - unsigned long long",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - int",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned int",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - unsigned long long",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - float",
|
||||
"Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses - double",
|
||||
"Unit_atomicMax_Negative_Parameters_RTC",
|
||||
"Unit_safeAtomicMin_Positive_Adjacent_Addresses - float",
|
||||
"Unit_safeAtomicMin_Positive_Adjacent_Addresses - double",
|
||||
"Unit_safeAtomicMin_Positive_Scattered_Addresses - float",
|
||||
"Unit_safeAtomicMin_Positive_Scattered_Addresses - double",
|
||||
"Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float",
|
||||
"Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double",
|
||||
"Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float",
|
||||
"Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double",
|
||||
"Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - float",
|
||||
"Unit_unsafeAtomicMin_Positive_Adjacent_Addresses - double",
|
||||
"Unit_unsafeAtomicMin_Positive_Scattered_Addresses - float",
|
||||
"Unit_unsafeAtomicMin_Positive_Scattered_Addresses - double",
|
||||
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - float",
|
||||
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses - double",
|
||||
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - float",
|
||||
"Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses - double",
|
||||
"Unit_safeAtomicMax_Positive_Adjacent_Addresses - float",
|
||||
"Unit_safeAtomicMax_Positive_Adjacent_Addresses - double",
|
||||
"Unit_safeAtomicMax_Positive_Scattered_Addresses - float",
|
||||
"Unit_safeAtomicMax_Positive_Scattered_Addresses - double",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float",
|
||||
"Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double",
|
||||
"Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - float",
|
||||
"Unit_unsafeAtomicMax_Positive_Adjacent_Addresses - double",
|
||||
"Unit_unsafeAtomicMax_Positive_Scattered_Addresses - float",
|
||||
"Unit_unsafeAtomicMax_Positive_Scattered_Addresses - double",
|
||||
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - float",
|
||||
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses - double",
|
||||
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - float",
|
||||
"Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses - double",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - float",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses - double",
|
||||
"Unit_atomicExch_Positive - int",
|
||||
"Unit_atomicExch_Positive - unsigned int",
|
||||
"Unit_atomicExch_Positive - unsigned long",
|
||||
"Unit_atomicExch_Positive - unsigned long long",
|
||||
"Unit_atomicExch_Positive - float",
|
||||
"Unit_atomicExch_Positive - double",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned int",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses - unsigned long long",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - int",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned int",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - unsigned long long",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - float",
|
||||
"Unit___hip_atomic_exchange_Positive_Wavefront - double",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - int",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned int",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - unsigned long long",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - float",
|
||||
"Unit___hip_atomic_exchange_Positive_Workgroup - double",
|
||||
"=== Below tests cause timeout in stress test of 09/02/24 ===",
|
||||
"Unit_Device___half2half2_Accuracy_Positive",
|
||||
"Unit_Device_make_half2_Accuracy_Positive",
|
||||
"Unit_Device___halves2half2_Accuracy_Positive",
|
||||
"Unit_Device___low2half_Accuracy_Positive",
|
||||
"Unit_Device___high2half_Accuracy_Positive",
|
||||
"Unit_Device___low2half2_Accuracy_Positive",
|
||||
"Unit_Device___high2half2_Accuracy_Positive",
|
||||
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
|
||||
"Unit_Device___lows2half2_Accuracy_Positive",
|
||||
"Unit_Device___highs2half2_Accuracy_Positive",
|
||||
"Unit_Device___float2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___floats2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___float22half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___low2float_Accuracy_Positive",
|
||||
"Unit_Device___high2float_Accuracy_Positive",
|
||||
"Unit_Device___half22float2_Accuracy_Positive",
|
||||
"Unit_Device_hcos_Accuracy_Positive",
|
||||
"Unit_Device_h2cos_Accuracy_Positive",
|
||||
"Unit_Device_hsin_Accuracy_Positive",
|
||||
"Unit_Device_h2sin_Accuracy_Positive",
|
||||
"Unit_Device_hexp_Accuracy_Positive",
|
||||
"Unit_Device_h2exp_Accuracy_Positive",
|
||||
"Unit_Device_hexp10_Accuracy_Positive",
|
||||
"Unit_Device_h2exp10_Accuracy_Positive",
|
||||
"Unit_Device_hexp2_Accuracy_Positive",
|
||||
"Unit_Device_h2exp2_Accuracy_Positive",
|
||||
"Unit_Device_hlog_Accuracy_Positive",
|
||||
"Unit_Device_h2log_Accuracy_Positive",
|
||||
"Unit_Device_hlog10_Accuracy_Positive",
|
||||
"Unit_Device_h2log10_Accuracy_Positive",
|
||||
"Unit_Device_hlog2_Accuracy_Positive",
|
||||
"Unit_Device_h2log2_Accuracy_Positive",
|
||||
"Unit_Device_hsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2sqrt_Accuracy_Positive",
|
||||
"Unit_Device_hceil_Accuracy_Positive",
|
||||
"Unit_Device_h2ceil_Accuracy_Positive",
|
||||
"Unit_Device_hfloor_Accuracy_Positive",
|
||||
"Unit_Device_h2floor_Accuracy_Positive",
|
||||
"Unit_Device_htrunc_Accuracy_Positive",
|
||||
"Unit_Device_h2trunc_Accuracy_Positive",
|
||||
"Unit_Device_hrcp_Accuracy_Positive",
|
||||
"Unit_Device_h2rcp_Accuracy_Positive",
|
||||
"Unit_Device_hrsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2rsqrt_Accuracy_Positive",
|
||||
"Unit_Device_hrint_Accuracy_Positive",
|
||||
"Unit_Device_h2rint_Accuracy_Positive",
|
||||
"Unit_Device___habs_Accuracy_Positive",
|
||||
"Unit_Device___habs2_Accuracy_Positive",
|
||||
"Unit_Device___hneg_Accuracy_Positive",
|
||||
"Unit_Device___hneg2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_wrapper_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_sat_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_Accuracy_Positive",
|
||||
"Unit_Device___hsub_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_Accuracy_Positive",
|
||||
"Unit_Device___hmul_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hdiv_Accuracy_Positive",
|
||||
"Unit_Device___h2div_Accuracy_Positive",
|
||||
"Unit_Device___hfma_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_Accuracy_Positive",
|
||||
"Unit_Device___hfma_sat_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hisinf_Accuracy_Positive",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___heq_Accuracy_Positive",
|
||||
"Unit_Device___hbeq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___heq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu_Accuracy_Positive",
|
||||
"Unit_Device___hbneu2_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu2_Accuracy_Positive",
|
||||
"Unit_Device___hge_Accuracy_Positive",
|
||||
"Unit_Device___hbge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt_Accuracy_Positive",
|
||||
"Unit_Device___hbgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hle_Accuracy_Positive",
|
||||
"Unit_Device___hble2_Accuracy_Positive",
|
||||
"Unit_Device___hleu_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hle2_Accuracy_Positive",
|
||||
"Unit_Device___hleu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt_Accuracy_Positive",
|
||||
"Unit_Device___hblt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu2_Accuracy_Positive",
|
||||
"Unit_Device___hmax_Accuracy_Positive",
|
||||
"Unit_Device___hmin_Accuracy_Positive",
|
||||
"Unit_Device___hmax_nan_Accuracy_Positive",
|
||||
"Unit_Device___hmin_nan_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2int_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2short_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_ru_Accuracy_Positive",
|
||||
"Unit_Device___half_as_short_Accuracy_Positive",
|
||||
"Unit_Device___half_as_ushort_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___int2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___short2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short_as_half_Accuracy_Positive",
|
||||
"Unit_Device___ushort_as_half_Accuracy_Positive",
|
||||
"Unit_Device___float2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___float2half_Accuracy_Positive",
|
||||
"Unit_Device___half2float_Accuracy_Positive",
|
||||
"Unit_Device___frcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___frsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___expf_Accuracy_Positive",
|
||||
"Unit_Device___exp10f_Accuracy_Positive",
|
||||
"Unit_Device___logf_Accuracy_Positive",
|
||||
"Unit_Device___log2f_Accuracy_Positive",
|
||||
"Unit_Device___log10f_Accuracy_Positive",
|
||||
"Unit_Device___sinf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_sin_Accuracy_Positive",
|
||||
"Unit_Device___cosf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_cos_Accuracy_Positive",
|
||||
"Unit_Device___fadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___fmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdividef_Accuracy_Positive",
|
||||
"Unit_Device___fmaf_rn_Accuracy_Positive",
|
||||
"Unit_Device___drcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___dadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___dmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___ddiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fma_rn_Accuracy_Positive",
|
||||
"Unit___hip_atomic_load_store_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_exchange_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Acquire_Release",
|
||||
"Unit___hip_atomic_load_store_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_exchange_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Wavefront - int",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Wavefront - float",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Wavefront - double",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Workgroup - int",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Workgroup - float",
|
||||
"Unit___hip_atomic_fetch_add_Positive_Workgroup - double",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float",
|
||||
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double",
|
||||
"Unit_atomicAdd_Positive - int",
|
||||
"Unit_atomicAdd_Positive - unsigned int",
|
||||
"Unit_atomicAdd_Positive - unsigned long",
|
||||
"Unit_atomicAdd_Positive - unsigned long long",
|
||||
"Unit_atomicAdd_Positive - float",
|
||||
"Unit_atomicAdd_Positive - double",
|
||||
"Unit_atomicAdd_Positive_Multi_Kernel - int",
|
||||
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long",
|
||||
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long",
|
||||
"Unit_atomicAdd_Positive_Multi_Kernel - float",
|
||||
"Unit_atomicAdd_Positive_Multi_Kernel - double",
|
||||
"Unit_atomicAdd_Negative_Parameters_RTC",
|
||||
"Unit_atomicAdd_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicAdd_system_Positive_Peer_GPUs - float",
|
||||
"Unit_atomicAdd_system_Positive_Peer_GPUs - double",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_GPU - int",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_GPU - float",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_GPU - double",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float",
|
||||
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double",
|
||||
"Unit_unsafeAtomicAdd_Positive - float",
|
||||
"Unit_unsafeAtomicAdd_Positive - double",
|
||||
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float",
|
||||
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double",
|
||||
"Unit_safeAtomicAdd_Positive - float",
|
||||
"Unit_safeAtomicAdd_Positive - double",
|
||||
"Unit_safeAtomicAdd_Positive_Multi_Kernel - float",
|
||||
"Unit_safeAtomicAdd_Positive_Multi_Kernel - double",
|
||||
"Unit_atomicSub_Positive - int",
|
||||
"Unit_atomicSub_Positive - unsigned int",
|
||||
"Unit_atomicSub_Positive - unsigned long",
|
||||
"Unit_atomicSub_Positive - unsigned long long",
|
||||
"Unit_atomicSub_Positive - float",
|
||||
"Unit_atomicSub_Positive - double",
|
||||
"Unit_atomicSub_Positive_Multi_Kernel - int",
|
||||
"Unit_atomicSub_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long",
|
||||
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long long",
|
||||
"Unit_atomicSub_Positive_Multi_Kernel - float",
|
||||
"Unit_atomicSub_Positive_Multi_Kernel - double",
|
||||
"Unit_atomicSub_Negative_Parameters_RTC",
|
||||
"Unit_atomicSub_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicSub_system_Positive_Peer_GPUs - float",
|
||||
"Unit_atomicSub_system_Positive_Peer_GPUs - double",
|
||||
"Unit_atomicSub_system_Positive_Host_And_GPU - int",
|
||||
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long",
|
||||
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicSub_system_Positive_Host_And_GPU - float",
|
||||
"Unit_atomicSub_system_Positive_Host_And_GPU - double",
|
||||
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long",
|
||||
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float",
|
||||
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double",
|
||||
"Unit_atomicInc_Positive - unsigned int",
|
||||
"Unit_atomicInc_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicInc_Negative_Parameters_RTC",
|
||||
"Unit_atomicDec_Positive - unsigned int",
|
||||
"Unit_atomicDec_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicDec_Negative_Parameters_RTC",
|
||||
"Unit_atomicCAS_Positive - int",
|
||||
"Unit_atomicCAS_Positive - unsigned int",
|
||||
"Unit_atomicCAS_Positive - unsigned long long",
|
||||
"Unit_atomicCAS_Positive_Multi_Kernel - int",
|
||||
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned int",
|
||||
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long",
|
||||
"Unit_atomicCAS_Negative_Parameters_RTC",
|
||||
"Unit_atomicCAS_system_Positive_Peer_GPUs - int",
|
||||
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long",
|
||||
"Unit_atomicCAS_system_Positive_Host_And_GPU - int",
|
||||
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int",
|
||||
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long",
|
||||
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int",
|
||||
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int",
|
||||
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long",
|
||||
#endif
|
||||
"End of json"
|
||||
]
|
||||
|
||||
@@ -89,6 +89,149 @@
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeElementType - float",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex1DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex3DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemap_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemap_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - unsigned int",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeElementType - float",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2D_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayered_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned char",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - short",
|
||||
"Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat - unsigned short",
|
||||
"Unit_hipDrvGetErrorString_Positive_Basic",
|
||||
"Unit_hipLaunchKernel_Negative_Parameters",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_hipMemImportFromShareableHandle_Positive_MultiProc",
|
||||
"Unit_hipMemMapArrayAsync_Positive_Basic"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemMapArrayAsync_Positive_Basic"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -36,6 +36,12 @@ int main(int argc, char** argv) {
|
||||
| Opt(cmd_options.cg_iterations, "cg_iterations")
|
||||
["-C"]["--cg-iterations"]
|
||||
("Number of iterations used for cooperative groups sync tests (default: 5)")
|
||||
| Opt(cmd_options.accuracy_iterations, "accuracy_iterations")
|
||||
["-A"]["--accuracy-iterations"]
|
||||
("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)")
|
||||
| Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory")
|
||||
["-M"]["--accuracy-max-memory"]
|
||||
("Percentage of global device memory allowed for math accuracy tests (default: 80%)")
|
||||
;
|
||||
// clang-format on
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
struct CmdOptions {
|
||||
int iterations = 10;
|
||||
int warmups = 100;
|
||||
@@ -29,6 +32,8 @@ struct CmdOptions {
|
||||
int cg_iterations = 5;
|
||||
bool no_display = false;
|
||||
bool progress = false;
|
||||
uint64_t accuracy_iterations = std::numeric_limits<uint32_t>::max() + 1ull;
|
||||
int accuracy_max_memory = 80;
|
||||
};
|
||||
|
||||
extern CmdOptions cmd_options;
|
||||
|
||||
@@ -129,6 +129,19 @@ THE SOFTWARE.
|
||||
} \
|
||||
}
|
||||
|
||||
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
||||
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
|
||||
{ \
|
||||
auto localError = errorExpr; \
|
||||
INFO("Matching Errors: " \
|
||||
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
|
||||
<< "\n Expected Code: " << expectedError << '\n' \
|
||||
<< " Actual Error: " << hiprtcGetErrorString(localError) \
|
||||
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
||||
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
||||
REQUIRE(localError == expectedError); \
|
||||
}
|
||||
|
||||
#define HIPASSERT(condition) \
|
||||
if (!(condition)) { \
|
||||
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
|
||||
@@ -165,7 +178,7 @@ static inline bool IsGfx11() {
|
||||
hipDeviceProp_t props{};
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
||||
// Get GCN Arch Name and compare to check if it is gfx11
|
||||
// Get GCN Arch Name and compare to check if it is gfx11
|
||||
std::string arch = std::string(props.gcnArchName);
|
||||
auto pos = arch.find("gfx11");
|
||||
if (pos != std::string::npos)
|
||||
@@ -173,7 +186,7 @@ static inline bool IsGfx11() {
|
||||
else
|
||||
return false;
|
||||
#else
|
||||
std::cout<<"Have to be either Nvidia or AMD platform, asserting"<<std::endl;
|
||||
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
@@ -308,7 +321,7 @@ void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerB
|
||||
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
|
||||
std::forward<Args>(packedArgs)...);
|
||||
#endif
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
//---
|
||||
|
||||
@@ -39,6 +39,13 @@ THE SOFTWARE.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup AtomicsTest Device Atomics
|
||||
* @{
|
||||
* This section describes tests for the Device Atomic APIs.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup DeviceLanguageTest Device Language
|
||||
* @{
|
||||
@@ -96,16 +103,23 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup KernelTest Kernel Functions Management
|
||||
* @{
|
||||
* This section describes the various kernel functions invocation.
|
||||
* @}
|
||||
*/
|
||||
* @defgroup KernelTest Kernel Functions Management
|
||||
* @{
|
||||
* This section describes the various kernel functions invocation.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup AtomicsTest Device Atomics
|
||||
* @defgroup SyncthreadsTest Synchronization Functions
|
||||
* @{
|
||||
* This section describes tests for the Device Atomic APIs.
|
||||
* This section describes tests for Synchronization Functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ThreadfenceTest Memory Fence Functions
|
||||
* @{
|
||||
* This section describes tests for Memory Fence Functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -119,7 +133,8 @@ THE SOFTWARE.
|
||||
/**
|
||||
* @defgroup PeerToPeerTest PeerToPeer Device Memory Access
|
||||
* @{
|
||||
* This section describes tests for the PeerToPeer device memory access functions of HIP runtime API.
|
||||
* This section describes tests for the PeerToPeer device memory access functions of HIP runtime
|
||||
* API.
|
||||
* @warning PeerToPeer support is experimental.
|
||||
* @}
|
||||
*/
|
||||
@@ -135,6 +150,7 @@ THE SOFTWARE.
|
||||
* @defgroup ShflTest warp shuffle function Management
|
||||
* @{
|
||||
* This section describes the warp shuffle types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -158,6 +174,13 @@ THE SOFTWARE.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup ModuleTest Module Management
|
||||
* @{
|
||||
* This section describes the module management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup TextureTest Texture Management
|
||||
* @{
|
||||
@@ -172,6 +195,13 @@ THE SOFTWARE.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup MathTest Math Device Functions
|
||||
* @{
|
||||
* This section describes tests for device math functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup PrintfTest Printf API Management
|
||||
* @{
|
||||
@@ -192,3 +222,10 @@ THE SOFTWARE.
|
||||
* This section describes tests for the Complex type functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup VirtualMemoryManagementTest Virtual Memory Management APIs
|
||||
* @{
|
||||
* This section describes the virtual memory management types & functions of HIP runtime API.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -23,7 +23,7 @@ THE SOFTWARE.
|
||||
#pragma once
|
||||
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
|
||||
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#include <variant>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
@@ -44,8 +44,9 @@ static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction
|
||||
}
|
||||
};
|
||||
|
||||
static hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
|
||||
static inline hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos,
|
||||
PtrVariant src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms parms = {0};
|
||||
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
|
||||
parms.dstArray = std::get<hipArray_t>(dst_ptr);
|
||||
@@ -185,7 +186,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
std::string msg = "Skipped as peer access cannot be enabled between devices " +
|
||||
std::to_string(src_device) + " " + std::to_string(dst_device);
|
||||
std::to_string(src_device) + " " + std::to_string(dst_device);
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str());
|
||||
return;
|
||||
}
|
||||
@@ -205,7 +206,8 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
|
||||
// Using dst_alloc width and height to set only the elements that will be copied over to
|
||||
// dst_alloc
|
||||
Iota<<<blocks, threads_per_block, 0, kernel_stream>>>(src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width_logical(),dst_alloc.height(), dst_alloc.depth());
|
||||
dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
@@ -626,15 +628,14 @@ constexpr auto MemTypeUnified() {
|
||||
|
||||
using DrvPtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
|
||||
|
||||
template <bool async = false>
|
||||
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
|
||||
hipStream_t stream = nullptr) {
|
||||
static inline HIP_MEMCPY3D GetDrvMemcpy3DParms(DrvPtrVariant dst_ptr, hipPos dst_pos,
|
||||
DrvPtrVariant src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind) {
|
||||
HIP_MEMCPY3D parms = {0};
|
||||
|
||||
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
|
||||
parms.dstMemoryType = hipMemoryTypeArray;
|
||||
parms.dstArray = std::get<hipArray_t>(dst_ptr);
|
||||
parms.dstArray = std::get<hipArray_t>(dst_ptr);
|
||||
} else {
|
||||
auto ptr = std::get<hipPitchedPtr>(dst_ptr);
|
||||
parms.dstPitch = ptr.pitch;
|
||||
@@ -694,6 +695,84 @@ hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVaria
|
||||
parms.dstY = dst_pos.y;
|
||||
parms.dstZ = dst_pos.z;
|
||||
|
||||
return parms;
|
||||
}
|
||||
|
||||
static inline bool operator==(const HIP_MEMCPY3D& lhs, const HIP_MEMCPY3D& rhs) {
|
||||
bool pos_eq = lhs.dstXInBytes == rhs.dstXInBytes && lhs.dstY == rhs.dstY &&
|
||||
lhs.dstZ == rhs.dstZ && lhs.srcXInBytes == rhs.srcXInBytes && lhs.srcY == rhs.srcY &&
|
||||
lhs.srcZ == rhs.srcZ;
|
||||
bool extent_eq =
|
||||
lhs.WidthInBytes == rhs.WidthInBytes && lhs.Height == rhs.Height && lhs.Depth == rhs.Depth;
|
||||
bool mem_eq = true;
|
||||
if (lhs.dstArray) {
|
||||
mem_eq = lhs.dstArray == rhs.dstArray && lhs.dstMemoryType == rhs.dstMemoryType;
|
||||
} else {
|
||||
mem_eq = lhs.dstPitch == rhs.dstPitch && lhs.dstMemoryType == rhs.dstMemoryType;
|
||||
}
|
||||
if (lhs.srcArray) {
|
||||
mem_eq = lhs.srcArray == rhs.srcArray && lhs.srcMemoryType == rhs.srcMemoryType;
|
||||
} else {
|
||||
mem_eq = lhs.srcPitch == rhs.srcPitch && lhs.srcMemoryType == rhs.srcMemoryType;
|
||||
}
|
||||
if (lhs.dstDevice) {
|
||||
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
|
||||
}
|
||||
if (lhs.dstHost) {
|
||||
mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice);
|
||||
}
|
||||
if (lhs.srcDevice) {
|
||||
mem_eq = mem_eq && (lhs.srcDevice == rhs.srcDevice);
|
||||
}
|
||||
if (lhs.srcHost) {
|
||||
mem_eq = mem_eq && (lhs.srcHost == rhs.srcHost);
|
||||
}
|
||||
|
||||
return pos_eq && extent_eq && mem_eq;
|
||||
}
|
||||
|
||||
// APIs hipDrvGraphMemcpyNodeGetParams, hipDrvGraphMemcpyNodeSetParams are yet to be implemented in HIP runtime.
|
||||
#if 0
|
||||
template <bool set_params = false>
|
||||
hipError_t DrvMemcpy3DGraphWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
|
||||
hipCtx_t context, hipStream_t stream = nullptr) {
|
||||
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
|
||||
hipGraph_t g = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&g, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
if constexpr (set_params) {
|
||||
auto reversed_parms = GetDrvMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
|
||||
ReverseMemcpyDirection(kind));
|
||||
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms, context));
|
||||
HIP_CHECK(hipDrvGraphMemcpyNodeSetParams(node, &parms));
|
||||
} else {
|
||||
HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &parms, context));
|
||||
}
|
||||
|
||||
HIP_MEMCPY3D retrieved_params = {0};
|
||||
HIP_CHECK(hipDrvGraphMemcpyNodeGetParams(node, &retrieved_params));
|
||||
REQUIRE(parms == retrieved_params);
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(g));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
#endif //if 0
|
||||
|
||||
template <bool async = false>
|
||||
hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind,
|
||||
hipStream_t stream = nullptr) {
|
||||
auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
|
||||
if constexpr (async) {
|
||||
return hipDrvMemcpy3DAsync(&parms, stream);
|
||||
} else {
|
||||
@@ -805,4 +884,4 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
}
|
||||
@@ -35,15 +35,15 @@ enum class LinearAllocs {
|
||||
inline std::string to_string(const LinearAllocs allocation_type) {
|
||||
switch (allocation_type) {
|
||||
case LinearAllocs::malloc:
|
||||
return "host pageable";
|
||||
return "malloc";
|
||||
case LinearAllocs::mallocAndRegister:
|
||||
return "registered";
|
||||
return "malloc + hipHostRegister";
|
||||
case LinearAllocs::hipHostMalloc:
|
||||
return "host pinned";
|
||||
return "hipHostMalloc";
|
||||
case LinearAllocs::hipMalloc:
|
||||
return "device malloc";
|
||||
return "hipMalloc";
|
||||
case LinearAllocs::hipMallocManaged:
|
||||
return "managed";
|
||||
return "hipMallocManaged";
|
||||
default:
|
||||
return "unknown alloc type";
|
||||
}
|
||||
@@ -83,24 +83,38 @@ template <typename T> class LinearAllocGuard {
|
||||
|
||||
LinearAllocGuard(const LinearAllocGuard&) = delete;
|
||||
|
||||
LinearAllocGuard(LinearAllocGuard&& o)
|
||||
: allocation_type_{o.allocation_type_}, ptr_{o.ptr_}, host_ptr_{o.host_ptr_} {
|
||||
o.allocation_type_ = LinearAllocs::noAlloc;
|
||||
o.ptr_ = nullptr;
|
||||
o.host_ptr_ = nullptr;
|
||||
}
|
||||
LinearAllocGuard(LinearAllocGuard&& o) { *this = std::move(o); }
|
||||
|
||||
LinearAllocGuard& operator=(LinearAllocGuard&& o) {
|
||||
allocation_type_ = o.allocation_type_;
|
||||
ptr_ = o.ptr_;
|
||||
host_ptr_ = o.host_ptr_;
|
||||
if (this != &o) {
|
||||
dealloc();
|
||||
|
||||
o.allocation_type_ = LinearAllocs::noAlloc;
|
||||
o.ptr_ = nullptr;
|
||||
o.host_ptr_ = nullptr;
|
||||
allocation_type_ = o.allocation_type_;
|
||||
ptr_ = o.ptr_;
|
||||
host_ptr_ = o.host_ptr_;
|
||||
|
||||
o.allocation_type_ = LinearAllocs::noAlloc;
|
||||
o.ptr_ = nullptr;
|
||||
o.host_ptr_ = nullptr;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~LinearAllocGuard() {
|
||||
~LinearAllocGuard() { dealloc(); }
|
||||
|
||||
T* ptr() const { return ptr_; };
|
||||
T* host_ptr() const { return host_ptr_; }
|
||||
|
||||
private:
|
||||
LinearAllocs allocation_type_ = LinearAllocs::noAlloc;
|
||||
T* ptr_ = nullptr;
|
||||
T* host_ptr_ = nullptr;
|
||||
|
||||
void dealloc() {
|
||||
if (ptr_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
// No Catch macros, don't want to possibly throw in the destructor
|
||||
if (ptr_ != nullptr) {
|
||||
switch (allocation_type_) {
|
||||
@@ -123,14 +137,6 @@ template <typename T> class LinearAllocGuard {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
T* ptr() const { return ptr_; };
|
||||
T* host_ptr() const { return host_ptr_; }
|
||||
|
||||
private:
|
||||
LinearAllocs allocation_type_ = LinearAllocs::noAlloc;
|
||||
T* ptr_ = nullptr;
|
||||
T* host_ptr_ = nullptr;
|
||||
};
|
||||
|
||||
template <typename T> class LinearAllocGuardMultiDim {
|
||||
@@ -210,6 +216,42 @@ template <typename T> class ArrayAllocGuard {
|
||||
const hipExtent extent_;
|
||||
};
|
||||
|
||||
template <typename T> class MipmappedArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain logical width
|
||||
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int levels,
|
||||
const unsigned int flags)
|
||||
: extent_{extent}, levels_{levels} {
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
|
||||
HIP_CHECK(hipMallocMipmappedArray(&ptr_, &desc, extent_, levels_, flags));
|
||||
}
|
||||
|
||||
MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u)
|
||||
: MipmappedArrayAllocGuard{extent, 1, flags} {}
|
||||
|
||||
~MipmappedArrayAllocGuard() { static_cast<void>(hipFreeMipmappedArray(ptr_)); }
|
||||
|
||||
MipmappedArrayAllocGuard(const MipmappedArrayAllocGuard&) = delete;
|
||||
MipmappedArrayAllocGuard(MipmappedArrayAllocGuard&&) = delete;
|
||||
|
||||
hipMipmappedArray_t ptr() const { return ptr_; }
|
||||
|
||||
hipArray_t GetLevel(unsigned int level) {
|
||||
hipArray_t ret;
|
||||
HIP_CHECK(hipGetMipmappedArrayLevel(&ret, ptr_, level));
|
||||
return ret;
|
||||
}
|
||||
|
||||
hipExtent extent() const { return extent_; }
|
||||
|
||||
unsigned int levels() const { return levels_; }
|
||||
|
||||
private:
|
||||
hipMipmappedArray_t ptr_ = nullptr;
|
||||
const hipExtent extent_;
|
||||
const unsigned int levels_;
|
||||
};
|
||||
|
||||
template <typename T> class DrvArrayAllocGuard {
|
||||
public:
|
||||
// extent should contain width in bytes
|
||||
@@ -266,24 +308,24 @@ class StreamGuard {
|
||||
|
||||
StreamGuard(const StreamGuard&) = delete;
|
||||
|
||||
StreamGuard(StreamGuard&& o)
|
||||
: stream_type_{o.stream_type_}, flags_{o.flags_}, priority_{o.priority_}, stream_{o.stream_} {
|
||||
o.stream_type_ = Streams::nullstream;
|
||||
o.flags_ = 0u;
|
||||
o.priority_ = 0;
|
||||
o.stream_ = nullptr;
|
||||
}
|
||||
StreamGuard(StreamGuard&& o) { *this = std::move(o); }
|
||||
|
||||
StreamGuard& operator=(StreamGuard&& o) {
|
||||
stream_type_ = o.stream_type_;
|
||||
flags_ = o.flags_;
|
||||
priority_ = o.priority_;
|
||||
stream_ = o.stream_;
|
||||
if (this != &o) {
|
||||
if (stream_type_ == Streams::created) {
|
||||
static_cast<void>(hipStreamDestroy(stream_));
|
||||
}
|
||||
|
||||
o.stream_type_ = Streams::nullstream;
|
||||
o.flags_ = 0u;
|
||||
o.priority_ = 0;
|
||||
o.stream_ = nullptr;
|
||||
stream_type_ = o.stream_type_;
|
||||
flags_ = o.flags_;
|
||||
priority_ = o.priority_;
|
||||
stream_ = o.stream_;
|
||||
|
||||
o.stream_type_ = Streams::nullstream;
|
||||
o.flags_ = 0u;
|
||||
o.priority_ = 0;
|
||||
o.stream_ = nullptr;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -170,7 +170,7 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes)
|
||||
return (... && DeviceAttributeSupport(device, attributes));
|
||||
}
|
||||
|
||||
inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) {
|
||||
inline int GetDeviceAttribute(const hipDeviceAttribute_t attr, int device) {
|
||||
int value = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));
|
||||
return value;
|
||||
|
||||
@@ -22,6 +22,7 @@ add_subdirectory(rtc)
|
||||
add_subdirectory(deviceLib)
|
||||
add_subdirectory(graph)
|
||||
add_subdirectory(memory)
|
||||
add_subdirectory(stream_ordered)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(event)
|
||||
add_subdirectory(occupancy)
|
||||
@@ -43,11 +44,15 @@ add_subdirectory(g++)
|
||||
add_subdirectory(module)
|
||||
add_subdirectory(channelDescriptor)
|
||||
add_subdirectory(executionControl)
|
||||
add_subdirectory(math)
|
||||
add_subdirectory(vector_types)
|
||||
add_subdirectory(atomics)
|
||||
add_subdirectory(complex)
|
||||
add_subdirectory(p2p)
|
||||
add_subdirectory(gcc)
|
||||
add_subdirectory(syncthreads)
|
||||
add_subdirectory(threadfence)
|
||||
add_subdirectory(virtualMemoryManagement)
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
add_subdirectory(callback)
|
||||
@@ -58,3 +63,5 @@ add_subdirectory(vulkan_interop)
|
||||
add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246
|
||||
endif()
|
||||
add_subdirectory(synchronization)
|
||||
add_subdirectory(launchBounds)
|
||||
add_subdirectory(assertion)
|
||||
@@ -0,0 +1,49 @@
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
set(TEST_SRC
|
||||
assert.cc
|
||||
)
|
||||
hip_add_exe_to_target(NAME AssertionTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS nvrtc)
|
||||
elseif(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
static_assert.cc
|
||||
assert.cc
|
||||
)
|
||||
hip_add_exe_to_target(NAME AssertionTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS hiprtc)
|
||||
endif()
|
||||
|
||||
# Below tests fail in PSDB
|
||||
#add_test(NAME Unit_StaticAssert_Positive_Basic
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# static_assert_kernels_positive.cc 2)
|
||||
#
|
||||
#add_test(NAME Unit_StaticAssert_Negative_Basic
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# static_assert_kernels_negative.cc 2)
|
||||
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <csetjmp>
|
||||
#include <csignal>
|
||||
|
||||
/**
|
||||
* @addtogroup assert assert
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* `void assert(int expression)` -
|
||||
* Stops the kernel execution if expression is equal to zero.
|
||||
*/
|
||||
|
||||
jmp_buf env_ignore_abort;
|
||||
volatile int abort_raised_flag = 0;
|
||||
|
||||
void on_sigabrt(int signum) {
|
||||
signal(signum, SIG_DFL);
|
||||
abort_raised_flag = 1;
|
||||
longjmp(env_ignore_abort, 1);
|
||||
}
|
||||
|
||||
void try_and_catch_abort(void (*func)()) {
|
||||
if (!setjmp(env_ignore_abort)) {
|
||||
signal(SIGABRT, &on_sigabrt);
|
||||
(*func)();
|
||||
signal(SIGABRT, SIG_DFL);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void AssertPassKernel(int* x) {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
*x = tid;
|
||||
// expected always to be true
|
||||
assert(tid >= 0);
|
||||
}
|
||||
|
||||
__global__ void AssertFailKernel(int* x) {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
*x = tid;
|
||||
// expected to fail for the even thread indices
|
||||
assert(tid % 2 == 1);
|
||||
}
|
||||
|
||||
template <bool should_abort> void LaunchAssertKernel() {
|
||||
const int num_blocks = 2;
|
||||
const int num_threads = 16;
|
||||
int *d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(int)));
|
||||
|
||||
if constexpr (should_abort) {
|
||||
AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
|
||||
#if HT_AMD
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
#else
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
|
||||
#endif
|
||||
} else {
|
||||
AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels with asserts that have an expression equal to 1.
|
||||
* - Expects that SIGABRT is not raised and kernels have executed successfully.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
|
||||
try_and_catch_abort(&LaunchAssertKernel<false>);
|
||||
REQUIRE(abort_raised_flag == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels with asserts that have an expression equal to 0.
|
||||
* - Expects that SIGABRT is raised and kernels have been stopped on AMD.
|
||||
* - The HIP runtime also aborts the host code, so this test case uses signal handlers
|
||||
* to avoid host code abortion.
|
||||
* - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA.
|
||||
* - The host code is not aborted.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") {
|
||||
try_and_catch_abort(&LaunchAssertKernel<true>);
|
||||
#if HT_AMD
|
||||
REQUIRE(abort_raised_flag == 1);
|
||||
#else
|
||||
REQUIRE(abort_raised_flag == 0);
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include "static_assert_kernels_rtc.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup static_assert static_assert
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* `void static_assert(constexpr expression, const char* message)` -
|
||||
* Stops the compilation if expression is equal to zero, and displays the specified message.
|
||||
*/
|
||||
|
||||
void StaticAssertWrapper(const char* program_source) {
|
||||
hiprtcProgram program{};
|
||||
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{2};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles kernels with static_assert calls:
|
||||
* -# Expected that static_assert passes and compilation is successful.
|
||||
* -# Expected that static_assert fails and compilation has errors.
|
||||
* - Uses RTC to perform compilation.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/static_assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); }
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Passes invalidly formed expressions to static_assert calls.
|
||||
* - Uses expressions that are not constexpr and values that are not known during compilation.
|
||||
* - Uses RTC to perform compilation.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/static_assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); }
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
__global__ void StaticAssertErrorKernel1() {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertErrorKernel2() {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
__global__ void StaticAssertPassKernel1() {
|
||||
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); }
|
||||
|
||||
__global__ void StaticAssertFailKernel1() {
|
||||
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); }
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Positive and negative kernels used for the static_assert Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kStaticAssert_Positive{
|
||||
R"(
|
||||
__global__ void StaticAssertPassKernel1() {
|
||||
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertPassKernel2() {
|
||||
static_assert(10 > 5, "[StaticAssertPassKernel2]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertFailKernel1() {
|
||||
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertFailKernel2() {
|
||||
static_assert(10 < 5, "[StaticAssertFailKernel2]");
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kStaticAssert_Negative{
|
||||
R"(
|
||||
__global__ void StaticAssertErrorKernel1() {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertErrorKernel2() {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
|
||||
}
|
||||
)"};
|
||||
@@ -18,31 +18,145 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
atomicExch.cc
|
||||
atomicExch_system.cc
|
||||
)
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
atomicAnd.cc
|
||||
atomicAnd_system.cc
|
||||
atomicOr.cc
|
||||
atomicOr_system.cc
|
||||
atomicXor.cc
|
||||
atomicXor_system.cc
|
||||
atomicMin.cc
|
||||
atomicMin_system.cc
|
||||
atomicMax.cc
|
||||
atomicMax_system.cc
|
||||
safeAtomicMin.cc
|
||||
unsafeAtomicMin.cc
|
||||
safeAtomicMax.cc
|
||||
unsafeAtomicMax.cc
|
||||
__hip_atomic_fetch_min.cc
|
||||
__hip_atomic_fetch_max.cc
|
||||
atomic_builtins.cc
|
||||
acquire_release.cc
|
||||
sequential_consistency.cc
|
||||
atomicAdd.cc
|
||||
atomicAdd_system.cc
|
||||
unsafeAtomicAdd.cc
|
||||
safeAtomicAdd.cc
|
||||
atomicSub.cc
|
||||
atomicSub_system.cc
|
||||
atomicCAS.cc
|
||||
atomicCAS_system.cc
|
||||
__hip_atomic_fetch_add.cc
|
||||
__hip_atomic_compare_exchange_strong.cc
|
||||
atomicExch.cc
|
||||
atomicExch_system.cc
|
||||
__hip_atomic_fetch_and.cc
|
||||
__hip_atomic_fetch_or.cc
|
||||
__hip_atomic_fetch_xor.cc
|
||||
__hip_atomic_exchange.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
hip_add_exe_to_target(NAME AtomicsTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS "nvrtc -rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
elseif(HIP_PLATFORM MATCHES "amd")
|
||||
hip_add_exe_to_target(NAME AtomicsTest
|
||||
#atomicInc & atomicDec tests are disabled on MI300X due to SWDEV-440688
|
||||
set(NOT_FOR_MI300X_TEST
|
||||
atomicInc.cc
|
||||
atomicDec.cc
|
||||
)
|
||||
set(MI300X_TARGET gfx941)
|
||||
function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL)
|
||||
set(ARCH_CHECK -1 PARENT_SCOPE)
|
||||
string(REGEX MATCHALL "--offload-arch=gfx[0-9a-z]+" OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR_LOCAL})
|
||||
foreach(OFFLOAD_ARCH IN LISTS OFFLOAD_ARCH_LIST)
|
||||
string(REGEX MATCHALL "--offload-arch=(gfx[0-9a-z]+)" matches ${OFFLOAD_ARCH})
|
||||
if (CMAKE_MATCH_COUNT EQUAL 1)
|
||||
if (CMAKE_MATCH_1 IN_LIST MI300X_TARGET)
|
||||
set(ARCH_CHECK 1 PARENT_SCOPE)
|
||||
endif() # CMAKE_MATCH_1
|
||||
endif() # CMAKE_MATCH_COUNT
|
||||
endforeach() # OFFLOAD_ARCH_LIST
|
||||
endfunction() # CheckAcceptedArchs
|
||||
|
||||
if (DEFINED OFFLOAD_ARCH_STR)
|
||||
CheckRejectedArchs(${OFFLOAD_ARCH_STR})
|
||||
elseif(DEFINED $ENV{HCC_AMDGPU_TARGET})
|
||||
CheckRejectedArchs($ENV{HCC_AMDGPU_TARGET})
|
||||
else()
|
||||
set(ARCH_CHECK -1)
|
||||
endif()
|
||||
if(${ARCH_CHECK} EQUAL -1)
|
||||
message(STATUS "Adding test: ${NOT_FOR_MI300X_TEST}")
|
||||
set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI300X_TEST})
|
||||
else()
|
||||
message(STATUS "Removing test: ${NOT_FOR_MI300X_TEST}")
|
||||
endif()
|
||||
|
||||
|
||||
hip_add_exe_to_target(NAME AtomicsTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS hiprtc)
|
||||
endif()
|
||||
set(EXPECTED_ERRORS 48)
|
||||
|
||||
# SWDEV-435667: Below 2 tests failed in stress test on 01/12/23
|
||||
#add_test(NAME Unit_atomicExch_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicExch_negative_kernels.cc 40)
|
||||
#
|
||||
#add_test(NAME Unit_atomicExch_system_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicExch_system_negative_kernels.cc 40)
|
||||
# Below tests fail in PSDB
|
||||
#add_test(NAME Unit_atomicAnd_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicAnd_negative_kernels.cc ${EXPECTED_ERRORS})
|
||||
#
|
||||
#add_test(NAME Unit_atomicOr_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicOr_negative_kernels.cc ${EXPECTED_ERRORS})
|
||||
#
|
||||
#add_test(NAME Unit_atomicXor_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicXor_negative_kernels.cc ${EXPECTED_ERRORS})
|
||||
#
|
||||
#add_test(NAME Unit_atomicMin_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicMin_negative_kernels.cc ${EXPECTED_ERRORS})
|
||||
#
|
||||
#add_test(NAME Unit_atomicMax_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicMax_negative_kernels.cc ${EXPECTED_ERRORS})
|
||||
#add_test(NAME Unit_AtomicBuiltins_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomic_builtins_kernels.cc 60 27) # Should be 35 warnings, see EXSWHTEC-309
|
||||
#add_test(NAME Unit_atomicAdd_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicAdd_negative_kernels.cc 48)
|
||||
#add_test(NAME Unit_atomicSub_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicSub_negative_kernels.cc 48)
|
||||
#add_test(NAME Unit_atomicInc_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicInc_negative_kernels.cc 8)
|
||||
#
|
||||
#add_test(NAME Unit_atomicDec_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicDec_negative_kernels.cc 8)
|
||||
#
|
||||
#add_test(NAME Unit_atomicCAS_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicCAS_negative_kernels.cc 48)
|
||||
#
|
||||
# SWDEV-435667: Below 2 tests failed in stress test on 01/12/23
|
||||
#add_test(NAME Unit_atomicExch_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicExch_negative_kernels.cc 40)
|
||||
#
|
||||
#add_test(NAME Unit_atomicExch_system_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
# atomicExch_system_negative_kernels.cc 40)
|
||||
endif()
|
||||
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_compare_exchange_strong __hip_atomic_compare_exchange_strong
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of __hip_atomic_compare_exchange_strong
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - WAVEFRONT memory scope.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_compare_exchange_strong.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of __hip_atomic_compare_exchange_strong
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - WORKGROUP memory scope.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_compare_exchange_strong.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinCAS,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "atomicExch_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_exchange __hip_atomic_exchange
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* ________________________
|
||||
* Test cases from other modules:
|
||||
* - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* exchange into a runtime determined memory location. Each thread will exchange its own grid wide
|
||||
* linear index + offset into the memory location, storing the return value into a separate output
|
||||
* array slot corresponding to it. Once complete, the union of output array and exchange memory is
|
||||
* validated to contain all values in the range [0, number_of_threads +
|
||||
* number_of_exchange_memory_slots). Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single memory location
|
||||
* -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicExch
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory
|
||||
* - Exchange memory located in shared memory
|
||||
* - WAVEFRONT memory scope
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_exchange.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Wavefront", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* exchange into a runtime determined memory location. Each thread will exchange its own grid wide
|
||||
* linear index + offset into the memory location, storing the return value into a separate output
|
||||
* array slot corresponding to it. Once complete, the union of output array and exchange memory is
|
||||
* validated to contain all values in the range [0, number_of_threads +
|
||||
* number_of_exchange_memory_slots). Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single memory location
|
||||
* -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicExch
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory
|
||||
* - Exchange memory located in shared memory
|
||||
* - WORKGROUP memory scope
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_exchange.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Workgroup", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
AtomicExchSingleDeviceSingleKernelTest<TestType, AtomicScopes::builtin,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* ________________________
|
||||
* Test cases from other modules:
|
||||
* - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of __hip_atomic_fetch_add
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - WAVEFRONT memory scope.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_add.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Wavefront", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of __hip_atomic_fetch_add
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - WORKGROUP memory scope.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_add.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Workgroup", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kBuiltinAdd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_and __hip_atomic_fetch_and
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_max __hip_atomic_fetch_max
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_max.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_max.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_max.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_max.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_max.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_max.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMax,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_min __hip_atomic_fetch_min
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_min.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_min.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_min.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_min.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_min.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_min.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kBuiltinMin,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_or __hip_atomic_fetch_or
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_xor __hip_atomic_fetch_xor
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,551 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "memory_order_common.hh"
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_load_store_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kLoadStore, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kLoadStore, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_exchange_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kExchange, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kExchange, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong,
|
||||
__ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong,
|
||||
__ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeStrong, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong,
|
||||
__ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAdd, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAdd, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAnd, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kAnd, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kOr, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kOr, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kOr, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kXor, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kXor, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kXor, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMin, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMin, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMin, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Acquire_Release") {
|
||||
SECTION("ACQUIRE/RELEASE") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMax, __ATOMIC_ACQUIRE>();
|
||||
}
|
||||
}
|
||||
SECTION("ACQ_REL") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMax, __ATOMIC_ACQ_REL>();
|
||||
}
|
||||
}
|
||||
SECTION("SEQ_CST") {
|
||||
SECTION("WAVEFRONT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
AcquireRelease::Test<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
AcquireRelease::SystemTest<BuiltinAtomicOperation::kMax, __ATOMIC_SEQ_CST>();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,577 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <cmd_options.hh>
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
// Atomic operations for which the tests in this file apply for
|
||||
enum class AtomicOperation {
|
||||
kAdd = 0,
|
||||
kAddSystem,
|
||||
kSub,
|
||||
kSubSystem,
|
||||
kInc,
|
||||
kDec,
|
||||
kUnsafeAdd,
|
||||
kSafeAdd,
|
||||
kCASAdd,
|
||||
kCASAddSystem,
|
||||
kBuiltinAdd,
|
||||
kBuiltinCAS
|
||||
};
|
||||
|
||||
// Constants that are passed as operands to the atomic operations
|
||||
constexpr auto kIntegerTestValue = 7;
|
||||
constexpr auto kFloatingPointTestValue = 3.125;
|
||||
constexpr auto kIncDecWraparoundValue = 1023;
|
||||
|
||||
// Retrieves test value constant based on the atomic operation and test type:
|
||||
// - kIncDecWraparoundValue for increment and decrement operations
|
||||
// - kFloatingPointTestValue for floating point test type
|
||||
// - kIntegerTestValue for integer test type
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
__host__ __device__ TestType GetTestValue() {
|
||||
if constexpr (operation == AtomicOperation::kInc || operation == AtomicOperation::kDec) {
|
||||
return kIncDecWraparoundValue;
|
||||
}
|
||||
|
||||
return std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
|
||||
}
|
||||
|
||||
// Implements an atomic addition via atomicCAS
|
||||
template <typename TestType> __device__ TestType CASAtomicAdd(TestType* address, TestType val) {
|
||||
TestType old = *address, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address, assumed, val + assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
// Implements an atomic addition via atomicCAS_system
|
||||
template <typename TestType>
|
||||
__device__ TestType CASAtomicAddSystem(TestType* address, TestType val) {
|
||||
TestType old = *address, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS_system(address, assumed, val + assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
// Implements an atomic addition via __hip_atomic_compare_exchange_strong
|
||||
template <typename TestType, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__device__ TestType BuiltinCASAtomicAdd(TestType* address, TestType val) {
|
||||
TestType old = *address, assumed;
|
||||
|
||||
const auto builtin_cas = [](TestType* address, TestType assumed, TestType val) {
|
||||
__hip_atomic_compare_exchange_strong(address, &assumed, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
||||
memory_scope);
|
||||
return assumed;
|
||||
};
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = builtin_cas(address, assumed, val + assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
// Performs an atomic operation on parameter `mem` based on the `operation` enumerator.
|
||||
// `memory_scope` is forwarded to the builtin operations and is by default device-wide.
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
const auto val = GetTestValue<TestType, operation>();
|
||||
|
||||
if constexpr (operation == AtomicOperation::kAdd) {
|
||||
return atomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kAddSystem) {
|
||||
return atomicAdd_system(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kSub) {
|
||||
return atomicSub(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kSubSystem) {
|
||||
return atomicSub_system(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kInc) {
|
||||
return atomicInc(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kDec) {
|
||||
return atomicDec(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kUnsafeAdd) {
|
||||
return unsafeAtomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kSafeAdd) {
|
||||
return safeAtomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kCASAdd) {
|
||||
return CASAtomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kCASAddSystem) {
|
||||
return CASAtomicAddSystem(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinAdd) {
|
||||
return __hip_atomic_fetch_add(mem, val, __ATOMIC_RELAXED, memory_scope);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinCAS) {
|
||||
return BuiltinCASAtomicAdd<TestType, memory_scope>(mem, val);
|
||||
}
|
||||
}
|
||||
|
||||
// This kernel executes the atomic operation specified by the enumerator `operation`. Results of
|
||||
// the atomic operations are stored in `old_vals`. Each thread executes the atomic operation on the
|
||||
// same memory location `global_mem`.
|
||||
// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic
|
||||
// operations are executed on shared memory, and the result is copied back to `global_mem`.
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
|
||||
__shared__ TestType shared_mem;
|
||||
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
|
||||
TestType* const mem = use_shared_mem ? &shared_mem : global_mem;
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
if (tid == 0) mem[0] = global_mem[0];
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
if (tid == 0) global_mem[0] = mem[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Indexes array `ptr`, with the size in bytes of each element specified by `pitch`
|
||||
template <typename TestType>
|
||||
__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch,
|
||||
const unsigned int idx) {
|
||||
const auto byte_ptr = reinterpret_cast<uint8_t*>(ptr);
|
||||
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
|
||||
}
|
||||
|
||||
// Executes arbitrary load-store operations on the range specified by `begin_addr` and `end_addr`
|
||||
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
|
||||
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
|
||||
uint8_t val = *addr;
|
||||
val ^= 0xAB;
|
||||
*addr = val;
|
||||
}
|
||||
}
|
||||
|
||||
// This kernel executes the atomic operation specified by the enumerator `operation`. Results of the
|
||||
// atomic operations are stored in `old_vals`. `global_mem` is an array with `width` number of
|
||||
// elements. Each thread performs the atomic operation on the element that corresponds to its thread
|
||||
// id (tid % width).
|
||||
// The elements of `global_mem` can be larger than sizeof(TestType) with the actual size in bytes
|
||||
// specified by `pitch`. This is done so we can test scenarios where threads target memory locations
|
||||
// that are scattered over different cache lines.
|
||||
// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic operations
|
||||
// are executed on shared memory, and the result is copied back to `global_mem`.
|
||||
// If `pitch` is greater than sizeof(TestType), random memory operations are performed in the empty
|
||||
// space between consecutive atomic operations so that we can test that the atomic operations
|
||||
// behaves correctly even with some interference.
|
||||
//
|
||||
// For example, given that sizeof(TestType) is 1, `width` is 3, and `pitch` is 4:
|
||||
//
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11
|
||||
// global_mem -> | x | | | | x | | | | x | | | |
|
||||
// | pitch | pitch | pitch |
|
||||
//
|
||||
// In this scenario, the atomic operations will target the elements denoted with `x` (addresses 0,
|
||||
// 4, 8). Random memory traffic will be generated on the addresses in between (1, 2, 3, 5, 6, 7, 9,
|
||||
// 10, 11)
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
|
||||
const unsigned int width, const unsigned int pitch) {
|
||||
extern __shared__ uint8_t shared_mem[];
|
||||
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
|
||||
TestType* const mem = use_shared_mem ? reinterpret_cast<TestType*>(shared_mem) : global_mem;
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
if (tid < width) {
|
||||
const auto target = PitchedOffset(mem, pitch, tid);
|
||||
*target = *PitchedOffset(global_mem, pitch, tid);
|
||||
};
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
const auto n = cooperative_groups::this_grid().size() - width;
|
||||
|
||||
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
|
||||
|
||||
if (tid < n) {
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
|
||||
PitchedOffset(mem, pitch, tid % width));
|
||||
} else {
|
||||
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
|
||||
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
|
||||
GenerateMemoryTraffic(begin_addr, end_addr);
|
||||
}
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
if (tid < width) {
|
||||
const auto target = PitchedOffset(global_mem, pitch, tid);
|
||||
*target = *PitchedOffset(mem, pitch, tid);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Used to configure test run
|
||||
struct TestParams {
|
||||
auto ThreadCount() const {
|
||||
return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z;
|
||||
}
|
||||
|
||||
auto HostIterationsPerThread() const { // number of iterations per host thread
|
||||
return std::max(num_devices * kernel_count * ThreadCount() / 20, width);
|
||||
}
|
||||
|
||||
dim3 blocks; // number of blocks per kernel launch
|
||||
dim3 threads; // number of threads per kernel launch
|
||||
unsigned int num_devices = 1u; // number of devices used
|
||||
unsigned int kernel_count = 1u; // number of kernels launched per device
|
||||
unsigned int width = 1u; // number of memory locations targeted
|
||||
unsigned int pitch = 0u; // defines spacing between memory locations
|
||||
unsigned int host_thread_count = 0u; // number of host threads launched
|
||||
LinearAllocs alloc_type; // type of allocation used
|
||||
};
|
||||
|
||||
// Reference implementation used to verify results
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const TestParams& p) {
|
||||
const auto val = GetTestValue<TestType, operation>();
|
||||
|
||||
const auto total_thread_count = p.num_devices * p.kernel_count * p.ThreadCount() +
|
||||
p.host_thread_count * p.HostIterationsPerThread();
|
||||
|
||||
std::vector<TestType> res_vals(p.width);
|
||||
std::vector<TestType> old_vals;
|
||||
old_vals.reserve(total_thread_count);
|
||||
|
||||
auto perform_op = [&](unsigned id) {
|
||||
auto& res = res_vals[id % p.width];
|
||||
old_vals.push_back(res);
|
||||
|
||||
if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem ||
|
||||
operation == AtomicOperation::kUnsafeAdd ||
|
||||
operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd ||
|
||||
operation == AtomicOperation::kCASAddSystem ||
|
||||
operation == AtomicOperation::kBuiltinAdd ||
|
||||
operation == AtomicOperation::kBuiltinCAS) {
|
||||
res = res + val;
|
||||
} else if constexpr (operation == AtomicOperation::kSub ||
|
||||
operation == AtomicOperation::kSubSystem) {
|
||||
res = res - val;
|
||||
} else if constexpr (operation == AtomicOperation::kInc) {
|
||||
res = (res >= val) ? 0 : res + 1;
|
||||
} else if constexpr (operation == AtomicOperation::kDec) {
|
||||
res = ((res == 0) || (res > val)) ? val : res - 1;
|
||||
}
|
||||
};
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
for (auto j = 0u; j < p.kernel_count; ++j) {
|
||||
for (auto tid = 0u; tid < p.ThreadCount() - p.width; ++tid) {
|
||||
perform_op(tid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < p.host_thread_count; ++i) {
|
||||
for (auto j = 0u; j < p.HostIterationsPerThread(); ++j) {
|
||||
perform_op(j);
|
||||
}
|
||||
}
|
||||
|
||||
return {res_vals, old_vals};
|
||||
}
|
||||
|
||||
// Compares the results of the test kernel stored in `res_vals` with results generated by the
|
||||
// reference implementation
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<TestType>& old_vals) {
|
||||
auto [expected_res_vals, expected_old_vals] = TestKernelHostRef<TestType, operation>(p);
|
||||
|
||||
for (auto i = 0u; i < res_vals.size(); ++i) {
|
||||
INFO("Results index: " << i);
|
||||
REQUIRE(expected_res_vals[i] == res_vals[i]);
|
||||
}
|
||||
|
||||
std::sort(begin(old_vals), end(old_vals));
|
||||
std::sort(begin(expected_old_vals), end(expected_old_vals));
|
||||
for (auto i = 0u; i < old_vals.size(); ++i) {
|
||||
INFO("Old values index: " << i);
|
||||
REQUIRE(expected_old_vals[i] == old_vals[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Launches the test kernel
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
|
||||
TestType* const old_vals) {
|
||||
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
|
||||
if (p.width == 1 && p.pitch == sizeof(TestType))
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
|
||||
else
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
|
||||
}
|
||||
|
||||
// Performs a host atomic operation on parameter `mem` based on the `operation` enumerator.
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* const old_vals,
|
||||
const unsigned int width, const unsigned pitch, TestType /*base_val*/) {
|
||||
const auto val = GetTestValue<TestType, operation>();
|
||||
|
||||
for (auto i = 0u; i < iterations; ++i) {
|
||||
if constexpr (operation == AtomicOperation::kAddSystem ||
|
||||
operation == AtomicOperation::kCASAddSystem ||
|
||||
operation == AtomicOperation::kBuiltinAdd ||
|
||||
operation == AtomicOperation::kBuiltinCAS) {
|
||||
old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED);
|
||||
} else if constexpr (operation == AtomicOperation::kSubSystem) {
|
||||
old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Launches host threads based on TestParams::host_thread_count that compete with the test kernel
|
||||
// for the same resources
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* const old_vals) {
|
||||
if (p.host_thread_count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto host_base_val = p.num_devices * p.kernel_count * p.ThreadCount();
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
for (auto i = 0u; i < p.host_thread_count; ++i) {
|
||||
const auto iterations = p.HostIterationsPerThread();
|
||||
const auto thread_base_val = host_base_val + i * iterations;
|
||||
threads.push_back(std::thread(HostAtomicOperation<TestType, operation>, iterations, mem,
|
||||
old_vals + thread_base_val, p.width, p.pitch, thread_base_val));
|
||||
}
|
||||
|
||||
for (auto& th : threads) {
|
||||
th.join();
|
||||
}
|
||||
}
|
||||
|
||||
// This is the main body of the test:
|
||||
// 1. Allocate memory based on TestParams::alloc_type
|
||||
// 2. Launch kernels based on TestParams::num_devices and TestParams::kernel_count
|
||||
// 3. Launch host threads based on TestParams::host_thread_count
|
||||
// 4. Verify the results
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void TestCore(const TestParams& p) {
|
||||
const unsigned int flags =
|
||||
p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u;
|
||||
|
||||
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
|
||||
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
|
||||
std::vector<StreamGuard> streams;
|
||||
for (auto i = 0; i < p.num_devices; ++i) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size);
|
||||
for (auto j = 0; j < p.kernel_count; ++j) {
|
||||
streams.emplace_back(Streams::created);
|
||||
}
|
||||
}
|
||||
|
||||
const auto mem_alloc_size = p.width * p.pitch;
|
||||
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size, flags);
|
||||
|
||||
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount() +
|
||||
p.host_thread_count * p.HostIterationsPerThread());
|
||||
std::vector<TestType> res_vals(p.width);
|
||||
|
||||
TestType* const mem_ptr =
|
||||
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
|
||||
|
||||
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
for (auto j = 0u; j < p.kernel_count; ++j) {
|
||||
const auto& stream = streams[i * p.kernel_count + j].stream();
|
||||
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
|
||||
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
|
||||
old_vals);
|
||||
}
|
||||
}
|
||||
|
||||
PerformHostAtomicOperation<TestType, operation>(p, mem_dev.host_ptr(), old_vals.data());
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
const auto device_offset = i * p.kernel_count * p.ThreadCount();
|
||||
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
|
||||
old_vals_alloc_size, hipMemcpyDeviceToHost));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
|
||||
p.width, hipMemcpyDeviceToHost));
|
||||
|
||||
Verify<TestType, operation>(p, res_vals, old_vals);
|
||||
}
|
||||
|
||||
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
|
||||
|
||||
inline dim3 GenerateBlockDimensions() {
|
||||
int sm_count = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
|
||||
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
|
||||
}
|
||||
|
||||
// Configures and creates the TestCore for a single device, and a single kernel launch
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = 1;
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinAdd ||
|
||||
operation == AtomicOperation::kBuiltinCAS) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
|
||||
params.threads = 1;
|
||||
} else if constexpr ((operation == AtomicOperation::kBuiltinAdd ||
|
||||
operation == AtomicOperation::kBuiltinCAS) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
params.threads = dim3(warp_size);
|
||||
} else {
|
||||
params.threads = GenerateThreadDimensions();
|
||||
}
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
SECTION("Global memory") {
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinAdd ||
|
||||
operation == AtomicOperation::kBuiltinCAS) &&
|
||||
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
|
||||
params.blocks = dim3(1);
|
||||
} else {
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
}
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false, memory_scope>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Shared memory") {
|
||||
params.blocks = dim3(1);
|
||||
params.alloc_type = LinearAllocs::hipMalloc;
|
||||
TestCore<TestType, operation, true, memory_scope>(params);
|
||||
}
|
||||
}
|
||||
|
||||
// Configures and creates the TestCore for a single device, and multiple kernel launches
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width,
|
||||
const unsigned int pitch) {
|
||||
int concurrent_kernels = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0));
|
||||
if (!concurrent_kernels) {
|
||||
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
|
||||
return;
|
||||
}
|
||||
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
params.threads = GenerateThreadDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Configures and creates the TestCore for a multiple devices (and host), and multiple kernel
|
||||
// launches
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices,
|
||||
const unsigned int kernel_count,
|
||||
const unsigned int width, const unsigned int pitch,
|
||||
const unsigned int host_thread_count = 0u) {
|
||||
if (num_devices > 1) {
|
||||
if (HipTest::getDeviceCount() < num_devices) {
|
||||
std::string msg = std::to_string(num_devices) + " devices are required";
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (kernel_count > 1) {
|
||||
for (auto i = 0u; i < num_devices; ++i) {
|
||||
int concurrent_kernels = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
|
||||
if (!concurrent_kernels) {
|
||||
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TestParams params;
|
||||
params.num_devices = num_devices;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
params.threads = GenerateThreadDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
params.host_thread_count = host_thread_count;
|
||||
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
#include "atomicAdd_negative_kernels_rtc.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicAdd atomicAdd
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicAdd
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kAdd>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kAdd>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kAdd>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. Once
|
||||
* complete, the output array and target memory is validated to contain all the expected values.
|
||||
* Several memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicAdd
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive_Multi_Kernel", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kAdd>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kAdd>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kAdd>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
|
||||
* atomicAdd.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicAdd_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicAdd_int, kAtomicAdd_uint, kAtomicAdd_ulong,
|
||||
kAtomicAdd_ulonglong, kAtomicAdd_float, kAtomicAdd_double);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicAdd_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicAdd(int* address, int val) */
|
||||
__global__ void atomicAdd_int_v1(int* address, int* result) { *result = atomicAdd(&address, 1234); }
|
||||
|
||||
__global__ void atomicAdd_int_v2(int* address, int* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v3(int* address, int* result) { *result = atomicAdd(1234, 1234); }
|
||||
|
||||
__global__ void atomicAdd_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v5(char* address, int* result) { *result = atomicAdd(address, 1234); }
|
||||
|
||||
__global__ void atomicAdd_int_v6(short* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v7(long* address, int* result) { *result = atomicAdd(address, 1234); }
|
||||
|
||||
__global__ void atomicAdd_int_v8(long long* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicAdd(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicAdd(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicAdd(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicAdd(float* address, float val) */
|
||||
__global__ void atomicAdd_float_v1(float* address, float* result) {
|
||||
*result = atomicAdd(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v2(float* address, float* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v3(float* address, float* result) {
|
||||
*result = atomicAdd(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v5(char* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v6(short* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v7(long* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v8(long long* address, float* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicAdd(double* address, double val) */
|
||||
__global__ void atomicAdd_double_v1(double* address, double* result) {
|
||||
*result = atomicAdd(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v2(double* address, double* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v3(double* address, double* result) {
|
||||
*result = atomicAdd(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v5(char* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v6(short* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v7(long* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v8(long long* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicAdd_int{
|
||||
R"(
|
||||
__global__ void atomicAdd_int_v1(int* address, int* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v2(int* address, int* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v3(int* address, int* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAdd_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v5(char* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v6(short* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v7(long* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_int_v8(long long* address, int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAdd_uint{
|
||||
R"(
|
||||
__global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAdd_ulong{
|
||||
R"(
|
||||
__global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAdd_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAdd_float{
|
||||
R"(
|
||||
__global__ void atomicAdd_float_v1(float* address, float* result) {
|
||||
*result = atomicAdd(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v2(float* address, float* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v3(float* address, float* result) {
|
||||
*result = atomicAdd(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAdd_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v5(char* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v6(short* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v7(long* address, float* result) {
|
||||
*result = atomicAdd(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_float_v8(long long* address, float* result) {
|
||||
*result = atomicAdd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAdd_double{
|
||||
R"(
|
||||
__global__ void atomicAdd_double_v1(double* address, double* result) {
|
||||
*result = atomicAdd(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v2(double* address, double* result) {
|
||||
*result = atomicAdd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v3(double* address, double* result) {
|
||||
*result = atomicAdd(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAdd_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v5(char* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v6(short* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v7(long* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicAdd_double_v8(long long* address, double* result) {
|
||||
*result = atomicAdd(address, 1234.0);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicAdd_system atomicAdd_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. Once
|
||||
* complete, the output array and target memory is validated to contain all the expected values.
|
||||
* Several memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicAdd_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAdd_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel on a single device wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. While
|
||||
* the kernel is running, the host performs atomic additions, in 4 threads, on the same memory
|
||||
* location(s). Once complete, the output array and target memory is validated to contain all the
|
||||
* expected values. Several memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicAdd_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAdd_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
1, 1, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
1, 1, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
1, 1, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times on two devices wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. While
|
||||
* the kernel is running, the host performs atomic additions, in 4 threads, on the same memory
|
||||
* location(s). Once complete, the output array and target memory is validated to contain all the
|
||||
* expected values. Several memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicAdd_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAdd_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
2, 2, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
2, 2, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kAddSystem>(
|
||||
2, 2, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "atomicAnd_negative_kernels_rtc.hh"
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicAnd atomicAnd
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicAnd(TestType* address, TestType* val)` -
|
||||
* performs atomic bitwise AND between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_SameAddress", "", int, unsigned int, unsigned long,
|
||||
unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Scattered_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAnd>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles atomicAnd with invalid parameters.
|
||||
* - Compiles the source with RTC.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicAnd_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source =
|
||||
GENERATE(kAtomicAnd_int, kAtomicAnd_uint, kAtomicAnd_ulong, kAtomicAnd_ulonglong);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicAnd_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
// Please check the content of negative_kernels_rtc.hh
|
||||
int expected_error_count{9};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicAnd(int* address, int val) */
|
||||
__global__ void atomicAnd_int_v1(int* address, int* result) { *result = atomicAnd(&address, 1234); }
|
||||
|
||||
__global__ void atomicAnd_int_v2(int* address, int* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v3(int* address, int* result) { *result = atomicAnd(1234, 1234); }
|
||||
|
||||
__global__ void atomicAnd_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v5(char* address, int* result) { *result = atomicAnd(address, 1234); }
|
||||
|
||||
__global__ void atomicAnd_int_v6(short* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v7(long* address, int* result) { *result = atomicAnd(address, 1234); }
|
||||
|
||||
__global__ void atomicAnd_int_v8(long long* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v9(float* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v10(double* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicAnd(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v9(float* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v10(double* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicAnd(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicAnd(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicAnd_int{
|
||||
R"(
|
||||
__global__ void atomicAnd_int_v1(int* address, int* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v2(int* address, int* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v3(int* address, int* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAnd_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v5(char* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v6(short* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v7(long* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v8(long long* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v9(float* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_int_v10(double* address, int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAnd_uint{
|
||||
R"(
|
||||
__global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v9(float* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_uint_v10(double* address, unsigned int* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAnd_ulong{
|
||||
R"(
|
||||
__global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicAnd_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) {
|
||||
*result = atomicAnd(address, 1234);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicAnd_system atomicAnd_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicAnd_system(TestType* address, TestType* val)` -
|
||||
* performs system-wide atomic bitwise AND between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd_system from multiple threads on the same address.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd_system from multiple threads on adjacent addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicAnd_system from multiple threads on scattered addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicAnd_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
#include "atomicCAS_negative_kernels_rtc.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicCAS atomicCAS
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
#ifdef HT_NVIDIA
|
||||
#define TYPES
|
||||
#else
|
||||
#define TYPES , float, double
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition, implemented using an atomic CAS operation, on a target memory location. Each thread
|
||||
* will add the same value to the memory location, storing the return value into a separate output
|
||||
* array slot corresponding to it. Once complete, the output array and target memory is validated to
|
||||
* contain all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive", "", int, unsigned int, unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each
|
||||
* thread will add the same value to the memory location, storing the return value into a separate
|
||||
* output array slot corresponding to it. Once complete, the output array and target memory is
|
||||
* validated to contain all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive_Multi_Kernel", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
|
||||
* atomicCAS.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicCAS_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicCAS_int, kAtomicCAS_uint, kAtomicCAS_ulong,
|
||||
kAtomicCAS_ulonglong, kAtomicCAS_float, kAtomicCAS_double);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicCAS_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
#define ATOMIC_CAS_NEGATIVE_KERNEL(type_name) \
|
||||
__global__ void atomicCAS_v1(type_name* address, type_name* result) { \
|
||||
*result = atomicCAS(&address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v2(type_name* address, type_name* result) { \
|
||||
*result = atomicCAS(address, address, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v3(type_name* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, address); \
|
||||
} \
|
||||
__global__ void atomicCAS_v4(Dummy* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v5(char* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v6(short* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v7(long* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v8(long long* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
}
|
||||
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(int)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned int)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long long)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(float)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(double)
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicCAS_int{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_int_v1(int* address, int* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v2(int* address, int* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v3(int* address, int* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v5(char* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v6(short* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v7(long* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v8(long long* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_uint{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_ulong{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_ulonglong{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_float{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_float_v1(float* address, float* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v2(float* address, float* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v3(float* address, float* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v5(char* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v6(short* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v7(long* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v8(long long* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_double{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_double_v1(double* address, double* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v2(double* address, double* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v3(double* address, double* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v5(char* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v6(short* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v7(long* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v8(long long* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicCAS_system atomicCAS_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
#ifdef HT_NVIDIA
|
||||
#define TYPES
|
||||
#else
|
||||
#define TYPES , float, double
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each
|
||||
* thread will add the same value to the memory location, storing the return value into a separate
|
||||
* output array slot corresponding to it. Once complete, the output array and target memory is
|
||||
* validated to contain all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel on a single device wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location.
|
||||
* Each thread will add the same value to the memory location, storing the return value into a
|
||||
* separate output array slot corresponding to it. While the kernel is running, the host
|
||||
* performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the
|
||||
* output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
1, 1, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
1, 1, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
1, 1, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times on two devices wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location.
|
||||
* Each thread will add the same value to the memory location, storing the return value into a
|
||||
* separate output array slot corresponding to it. While the kernel is running, the host
|
||||
* performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the
|
||||
* output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
#include "atomicDec_negative_kernels_rtc.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicDec atomicDec
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* decrement on a target memory location. Each thread will decrement the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads decrement a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicDec
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicDec.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicDec_Positive", "", unsigned int) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kDec>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kDec>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kDec>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
|
||||
* an atomic decrement on a target memory location. Each thread will decrement the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. Once
|
||||
* complete, the output array and target memory is validated to contain all the expected values.
|
||||
* Several memory access patterns are tested:
|
||||
* -# All threads decrement a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicDec
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicDec.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicDec_Positive_Multi_Kernel", "", unsigned int) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kDec>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kDec>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kDec>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
|
||||
* atomicDec.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicDec.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicDec_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicDec_uint);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicDec_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* unsigned int atomicDec(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicDec(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicDec(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicDec(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicDec_uint{
|
||||
R"(
|
||||
__global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicDec(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicDec(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicDec(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicDec_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicDec(address, 1234);
|
||||
}
|
||||
)"};
|
||||
@@ -24,22 +24,26 @@ THE SOFTWARE.
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include <cmd_options.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <cmd_options.hh>
|
||||
|
||||
enum class AtomicScopes { device, system };
|
||||
enum class AtomicScopes { device, system, builtin };
|
||||
|
||||
template <typename T, AtomicScopes scope> __device__ T perform_atomic_exch(T* address, T val) {
|
||||
template <typename T, AtomicScopes scope, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__device__ T perform_atomic_exch(T* address, T val) {
|
||||
if constexpr (scope == AtomicScopes::device) {
|
||||
return atomicExch(address, val);
|
||||
} else if (scope == AtomicScopes::system) {
|
||||
return atomicExch_system(address, val);
|
||||
} else if (scope == AtomicScopes::builtin) {
|
||||
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, memory_scope);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, bool use_shared_mem, AtomicScopes scope>
|
||||
template <typename T, bool use_shared_mem, AtomicScopes scope,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const old_vals) {
|
||||
__shared__ T shared_mem;
|
||||
|
||||
@@ -52,7 +56,7 @@ __global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const ol
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] = perform_atomic_exch<T, scope>(mem, static_cast<T>(tid + 1));
|
||||
old_vals[tid] = perform_atomic_exch<T, scope, memory_scope>(mem, static_cast<T>(tid + 1));
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
@@ -67,7 +71,16 @@ __host__ __device__ T* pitched_offset(T* const ptr, const unsigned int pitch,
|
||||
return reinterpret_cast<T*>(byte_ptr + idx * pitch);
|
||||
}
|
||||
|
||||
template <typename T, bool use_shared_mem, AtomicScopes scope>
|
||||
__device__ void generate_memory_traffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
|
||||
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
|
||||
uint8_t val = *addr;
|
||||
val ^= 0xAB;
|
||||
*addr = val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, bool use_shared_mem, AtomicScopes scope,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const unsigned int width,
|
||||
const unsigned pitch, const T base_val = 0) {
|
||||
extern __shared__ uint8_t shared_mem[];
|
||||
@@ -84,8 +97,18 @@ __global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] = perform_atomic_exch<T, scope>(pitched_offset(mem, pitch, tid % width),
|
||||
base_val + static_cast<T>(tid + width));
|
||||
const auto n = cooperative_groups::this_grid().size() - width;
|
||||
|
||||
T* atomic_addr = pitched_offset(mem, pitch, tid % width);
|
||||
|
||||
if (tid < n) {
|
||||
old_vals[tid] = perform_atomic_exch<T, scope, memory_scope>(
|
||||
pitched_offset(mem, pitch, tid % width), base_val + static_cast<T>(tid + width));
|
||||
} else {
|
||||
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
|
||||
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
|
||||
generate_memory_traffic(begin_addr, end_addr);
|
||||
}
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
@@ -255,14 +278,16 @@ class AtomicExchCRTP {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, bool use_shared_mem, AtomicScopes scope>
|
||||
template <typename T, bool use_shared_mem, AtomicScopes scope,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
class AtomicExch
|
||||
: public AtomicExchCRTP<AtomicExch<T, use_shared_mem, scope>, T, use_shared_mem, scope> {
|
||||
public:
|
||||
void LaunchKernel(const unsigned int shared_mem_size, const hipStream_t stream, T* const mem,
|
||||
T* const old_vals, const T base_val, const AtomicExchParams& p) const {
|
||||
atomic_exch_kernel<T, use_shared_mem, scope><<<p.blocks, p.threads, shared_mem_size, stream>>>(
|
||||
mem, old_vals, p.width, p.pitch, base_val);
|
||||
atomic_exch_kernel<T, use_shared_mem, scope, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem, old_vals, p.width, p.pitch,
|
||||
base_val);
|
||||
}
|
||||
|
||||
void ValidateResults(std::vector<T>& old_vals) const {
|
||||
@@ -281,23 +306,39 @@ inline dim3 GenerateAtomicExchBlockDimensions() {
|
||||
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicScopes scope>
|
||||
template <typename TestType, AtomicScopes scope, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
|
||||
AtomicExchParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = 1;
|
||||
params.threads = GenerateAtomicExchThreadDimensions();
|
||||
if constexpr (scope == AtomicScopes::builtin && memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
|
||||
params.threads = 1;
|
||||
} else if constexpr (scope == AtomicScopes::builtin &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
params.threads = dim3(warp_size);
|
||||
} else {
|
||||
params.threads = GenerateAtomicExchThreadDimensions();
|
||||
}
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
SECTION("Global memory") {
|
||||
params.blocks = GenerateAtomicExchBlockDimensions();
|
||||
if constexpr (scope == AtomicScopes::builtin &&
|
||||
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
|
||||
params.blocks = dim3(1);
|
||||
} else {
|
||||
params.blocks = GenerateAtomicExchBlockDimensions();
|
||||
}
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
AtomicExch<TestType, false, scope>().run(params);
|
||||
AtomicExch<TestType, false, scope, memory_scope>().run(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -305,7 +346,7 @@ void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsi
|
||||
SECTION("Shared memory") {
|
||||
params.blocks = dim3(1);
|
||||
params.alloc_type = LinearAllocs::hipMalloc;
|
||||
AtomicExch<TestType, true, scope>().run(params);
|
||||
AtomicExch<TestType, true, scope, memory_scope>().run(params);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
#include "atomicInc_negative_kernels_rtc.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicInc atomicInc
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* increment on a target memory location. Each thread will increment the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads increment a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicInc
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicInc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicInc_Positive", "", unsigned int) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kInc>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kInc>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kInc>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will
|
||||
* perform an atomic increment on a target memory location. Each thread will increment the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. Once
|
||||
* complete, the output array and target memory is validated to contain all the expected values.
|
||||
* Several memory access patterns are tested:
|
||||
* -# All threads increment a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicInc
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicInc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicInc_Positive_Multi_Kernel", "", unsigned int) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kInc>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kInc>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kInc>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
|
||||
* atomicInc.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicInc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicInc_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicInc_uint);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicInc_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* unsigned int atomicInc(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicInc(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicInc(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicInc(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicInc_uint{
|
||||
R"(
|
||||
__global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicInc(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicInc(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicInc(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicInc_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicInc(address, 1234);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "atomicMax_negative_kernels_rtc.hh"
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicMax atomicMax
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicMax(TestType* address, TestType* val)` -
|
||||
* calculates maximum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_SameAddress", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax from multiple threads on the scaterred addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Scattered_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax from multiple threads on the scaterred addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMax>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles atomicMax with invalid parameters.
|
||||
* - Compiles the source with RTC.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicMax_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicMax_int, kAtomicMax_uint, kAtomicMax_ulong,
|
||||
kAtomicMax_ulonglong, kAtomicMax_float, kAtomicMax_double);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicMax_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
// Please check the content of negative_kernels_rtc.hh
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicMax(int* address, int val) */
|
||||
__global__ void atomicMax_int_v1(int* address, int* result) { *result = atomicMax(&address, 1234); }
|
||||
|
||||
__global__ void atomicMax_int_v2(int* address, int* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v3(int* address, int* result) { *result = atomicMax(1234, 1234); }
|
||||
|
||||
__global__ void atomicMax_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v5(char* address, int* result) { *result = atomicMax(address, 1234); }
|
||||
|
||||
__global__ void atomicMax_int_v6(short* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v7(long* address, int* result) { *result = atomicMax(address, 1234); }
|
||||
|
||||
__global__ void atomicMax_int_v8(long long* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicMax(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMax(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMax(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMax(float* address, float val) */
|
||||
__global__ void atomicMax_float_v1(float* address, float* result) {
|
||||
*result = atomicMax(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v2(float* address, float* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v3(float* address, float* result) {
|
||||
*result = atomicMax(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v5(char* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v6(short* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v7(long* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v8(long long* address, float* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMax(double* address, double val) */
|
||||
__global__ void atomicMax_double_v1(double* address, double* result) {
|
||||
*result = atomicMax(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v2(double* address, double* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v3(double* address, double* result) {
|
||||
*result = atomicMax(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v5(char* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v6(short* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v7(long* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v8(long long* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicMax_int{
|
||||
R"(
|
||||
__global__ void atomicMax_int_v1(int* address, int* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v2(int* address, int* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v3(int* address, int* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMax_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v5(char* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v6(short* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v7(long* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_int_v8(long long* address, int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMax_uint{
|
||||
R"(
|
||||
__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMax_ulong{
|
||||
R"(
|
||||
__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMax_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMax_float{
|
||||
R"(
|
||||
__global__ void atomicMax_float_v1(float* address, float* result) {
|
||||
*result = atomicMax(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v2(float* address, float* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v3(float* address, float* result) {
|
||||
*result = atomicMax(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMax_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v5(char* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v6(short* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v7(long* address, float* result) {
|
||||
*result = atomicMax(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_float_v8(long long* address, float* result) {
|
||||
*result = atomicMax(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMax_double{
|
||||
R"(
|
||||
__global__ void atomicMax_double_v1(double* address, double* result) {
|
||||
*result = atomicMax(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v2(double* address, double* result) {
|
||||
*result = atomicMax(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v3(double* address, double* result) {
|
||||
*result = atomicMax(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMax_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v5(char* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v6(short* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v7(long* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMax_double_v8(long long* address, double* result) {
|
||||
*result = atomicMax(address, 1234.0);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicMax_system atomicMax_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicMax_system(TestType* address, TestType* val)` -
|
||||
* performs system-wide atomic maximum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax_system from multiple threads on the same address.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
#if HT_AMD
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
#else
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
#endif
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMaxSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax_system from multiple threads on adjacent addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
#if HT_AMD
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
#else
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
#endif
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMaxSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMax_system from multiple threads on scaterred addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMax_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
#if HT_AMD
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
#else
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
#endif
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMaxSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "atomicMin_negative_kernels_rtc.hh"
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicMin atomicMin
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicMin(TestType* address, TestType* val)` -
|
||||
* calculates minimum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_SameAddress", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin from multiple threads on the scaterred addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Scattered_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin from multiple threads on the scaterred addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMin>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles atomicMin with invalid parameters.
|
||||
* - Compiles the source with RTC.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicMin_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicMin_int, kAtomicMin_uint, kAtomicMin_ulong,
|
||||
kAtomicMin_ulonglong, kAtomicMin_float, kAtomicMin_double);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicMin_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
// Please check the content of negative_kernels_rtc.hh
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicMin(int* address, int val) */
|
||||
__global__ void atomicMin_int_v1(int* address, int* result) { *result = atomicMin(&address, 1234); }
|
||||
|
||||
__global__ void atomicMin_int_v2(int* address, int* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v3(int* address, int* result) { *result = atomicMin(1234, 1234); }
|
||||
|
||||
__global__ void atomicMin_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v5(char* address, int* result) { *result = atomicMin(address, 1234); }
|
||||
|
||||
__global__ void atomicMin_int_v6(short* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v7(long* address, int* result) { *result = atomicMin(address, 1234); }
|
||||
|
||||
__global__ void atomicMin_int_v8(long long* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicMin(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMin(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMin(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMin(float* address, float val) */
|
||||
__global__ void atomicMin_float_v1(float* address, float* result) {
|
||||
*result = atomicMin(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v2(float* address, float* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v3(float* address, float* result) {
|
||||
*result = atomicMin(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v5(char* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v6(short* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v7(long* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v8(long long* address, float* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicMin(double* address, double val) */
|
||||
__global__ void atomicMin_double_v1(double* address, double* result) {
|
||||
*result = atomicMin(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v2(double* address, double* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v3(double* address, double* result) {
|
||||
*result = atomicMin(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v5(char* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v6(short* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v7(long* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v8(long long* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicMin_int{
|
||||
R"(
|
||||
__global__ void atomicMin_int_v1(int* address, int* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v2(int* address, int* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v3(int* address, int* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMin_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v5(char* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v6(short* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v7(long* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_int_v8(long long* address, int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMin_uint{
|
||||
R"(
|
||||
__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMin_ulong{
|
||||
R"(
|
||||
__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMin_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMin_float{
|
||||
R"(
|
||||
__global__ void atomicMin_float_v1(float* address, float* result) {
|
||||
*result = atomicMin(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v2(float* address, float* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v3(float* address, float* result) {
|
||||
*result = atomicMin(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMin_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v5(char* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v6(short* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v7(long* address, float* result) {
|
||||
*result = atomicMin(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_float_v8(long long* address, float* result) {
|
||||
*result = atomicMin(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicMin_double{
|
||||
R"(
|
||||
__global__ void atomicMin_double_v1(double* address, double* result) {
|
||||
*result = atomicMin(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v2(double* address, double* result) {
|
||||
*result = atomicMin(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v3(double* address, double* result) {
|
||||
*result = atomicMin(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicMin_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v5(char* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v6(short* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v7(long* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicMin_double_v8(long long* address, double* result) {
|
||||
*result = atomicMin(address, 1234.0);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicMin_system atomicMin_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicMin_system(TestType* address, TestType* val)` -
|
||||
* performs system-wide atomic minimum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin_system from multiple threads on the same address.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
#if HT_AMD
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
#else
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
#endif
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMinSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin_system from multiple threads on adjacent addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
#if HT_AMD
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
#else
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
#endif
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMinSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicMin_system from multiple threads on scaterred addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicMin_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
#if HT_AMD
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long, float, double) {
|
||||
#else
|
||||
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
#endif
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::MultipleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kMinSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "atomicOr_negative_kernels_rtc.hh"
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicOr atomicOr
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicOr(TestType* address, TestType* val)` -
|
||||
* performs atomic bitwise OR between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_SameAddress", "", int, unsigned int, unsigned long,
|
||||
unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Scattered_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOr>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles atomicAnd with invalid parameters.
|
||||
* - Compiles the source with RTC.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicOr_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source =
|
||||
GENERATE(kAtomicOr_int, kAtomicOr_uint, kAtomicOr_ulong, kAtomicOr_ulonglong);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicOr_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
// Please check the content of negative_kernels_rtc.hh
|
||||
int expected_error_count{9};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicOr(int* address, int val) */
|
||||
__global__ void atomicOr_int_v1(int* address, int* result) { *result = atomicOr(&address, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v2(int* address, int* result) { *result = atomicOr(address, address); }
|
||||
|
||||
__global__ void atomicOr_int_v3(int* address, int* result) { *result = atomicOr(1234, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v4(Dummy* address, int* result) { *result = atomicOr(address, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v5(char* address, int* result) { *result = atomicOr(address, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v6(short* address, int* result) { *result = atomicOr(address, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v7(long* address, int* result) { *result = atomicOr(address, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v8(long long* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v9(float* address, int* result) { *result = atomicOr(address, 1234); }
|
||||
|
||||
__global__ void atomicOr_int_v10(double* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicOr(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v9(float* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v10(double* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicOr(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v9(float* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v10(double* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicOr(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicOr_int{
|
||||
R"(
|
||||
__global__ void atomicOr_int_v1(int* address, int* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v2(int* address, int* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v3(int* address, int* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicOr_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v5(char* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v6(short* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v7(long* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v8(long long* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v9(float* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_int_v10(double* address, int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicOr_uint{
|
||||
R"(
|
||||
__global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v9(float* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_uint_v10(double* address, unsigned int* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicOr_ulong{
|
||||
R"(
|
||||
__global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v9(float* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulong_v10(double* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicOr_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicOr_system atomicOr_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicOr_system(TestType* address, TestType* val)` -
|
||||
* performs system-wide atomic bitwise OR between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr_system from multiple threads on the same address.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr_system from multiple threads on adjacent addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicOr_system from multiple threads on scattered addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicOr_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
#include "atomicSub_negative_kernels_rtc.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicSub atomicSub
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* subtraction on a target memory location. Each thread will subtract the same value from the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. Once
|
||||
* complete, the output array and target memory is validated to contain all the expected values.
|
||||
* Several memory access patterns are tested:
|
||||
* -# All threads subtract from a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicSub
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicSub.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicSub_Positive", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSub>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSub>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSub>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
|
||||
* an atomic subtraction on a target memory location. Each thread will subtract the same value from
|
||||
* the memory location, storing the return value into a separate output array slot corresponding to
|
||||
* it. Once complete, the output array and target memory is validated to contain all the expected
|
||||
* values. Several memory access patterns are tested:
|
||||
* -# All threads subtract from a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicSub
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicSub.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicSub_Positive_Multi_Kernel", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSub>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSub>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSub>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
|
||||
* atomicSub.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicSub.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicSub_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicSub_int, kAtomicSub_uint, kAtomicSub_ulong,
|
||||
kAtomicSub_ulonglong, kAtomicSub_float, kAtomicSub_double);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicSub_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicSub(int* address, int val) */
|
||||
__global__ void atomicSub_int_v1(int* address, int* result) { *result = atomicSub(&address, 1234); }
|
||||
|
||||
__global__ void atomicSub_int_v2(int* address, int* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v3(int* address, int* result) { *result = atomicSub(1234, 1234); }
|
||||
|
||||
__global__ void atomicSub_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v5(char* address, int* result) { *result = atomicSub(address, 1234); }
|
||||
|
||||
__global__ void atomicSub_int_v6(short* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v7(long* address, int* result) { *result = atomicSub(address, 1234); }
|
||||
|
||||
__global__ void atomicSub_int_v8(long long* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicSub(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicSub(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicSub(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicSub(float* address, float val) */
|
||||
__global__ void atomicSub_float_v1(float* address, float* result) {
|
||||
*result = atomicSub(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v2(float* address, float* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v3(float* address, float* result) {
|
||||
*result = atomicSub(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v5(char* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v6(short* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v7(long* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v8(long long* address, float* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicSub(double* address, double val) */
|
||||
__global__ void atomicSub_double_v1(double* address, double* result) {
|
||||
*result = atomicSub(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v2(double* address, double* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v3(double* address, double* result) {
|
||||
*result = atomicSub(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v5(char* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v6(short* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v7(long* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v8(long long* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicSub_int{
|
||||
R"(
|
||||
__global__ void atomicSub_int_v1(int* address, int* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v2(int* address, int* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v3(int* address, int* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicSub_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v5(char* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v6(short* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v7(long* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_int_v8(long long* address, int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicSub_uint{
|
||||
R"(
|
||||
__global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicSub_ulong{
|
||||
R"(
|
||||
__global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicSub_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicSub_float{
|
||||
R"(
|
||||
__global__ void atomicSub_float_v1(float* address, float* result) {
|
||||
*result = atomicSub(&address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v2(float* address, float* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v3(float* address, float* result) {
|
||||
*result = atomicSub(1234.f, 1234.f);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicSub_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v5(char* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v6(short* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v7(long* address, float* result) {
|
||||
*result = atomicSub(address, 1234.f);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_float_v8(long long* address, float* result) {
|
||||
*result = atomicSub(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicSub_double{
|
||||
R"(
|
||||
__global__ void atomicSub_double_v1(double* address, double* result) {
|
||||
*result = atomicSub(&address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v2(double* address, double* result) {
|
||||
*result = atomicSub(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v3(double* address, double* result) {
|
||||
*result = atomicSub(1234.0, 1234.0);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicSub_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v5(char* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v6(short* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v7(long* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
|
||||
__global__ void atomicSub_double_v8(long long* address, double* result) {
|
||||
*result = atomicSub(address, 1234.0);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicSub_system atomicSub_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the memory
|
||||
* location, storing the return value into a separate output array slot corresponding to it. Once
|
||||
* complete, the output array and target memory is validated to contain all the expected values.
|
||||
* Several memory access patterns are tested:
|
||||
* -# All threads subtract from a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicSub_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicSub_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
|
||||
unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel on a single device wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the
|
||||
* memory location, storing the return value into a separate output array slot corresponding to
|
||||
* it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same
|
||||
* memory location(s). Once complete, the output array and target memory is validated to contain
|
||||
* all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads subtract from a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicSub_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicSub_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
1, 1, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
1, 1, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
1, 1, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times on two devices wherein all threads will perform
|
||||
* an atomic addition on a target memory location. Each thread will add the same value to the
|
||||
* memory location, storing the return value into a separate output array slot corresponding to
|
||||
* it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same
|
||||
* memory location(s). Once complete, the output array and target memory is validated to contain
|
||||
* all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads subtract from a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicSub_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicSub_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
|
||||
unsigned long, unsigned long long, float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
2, 2, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
2, 2, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kSubSystem>(
|
||||
2, 2, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "atomicXor_negative_kernels_rtc.hh"
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicXor atomicXor
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicXor(TestType* address, TestType* val)` -
|
||||
* performs atomic bitwise XOR between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_SameAddress", "", int, unsigned int, unsigned long,
|
||||
unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Scattered_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
|
||||
2, warp_size - 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXor>(
|
||||
2, warp_size - 1, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles atomicXor with invalid parameters.
|
||||
* - Compiles the source with RTC.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicXor_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source =
|
||||
GENERATE(kAtomicXor_int, kAtomicXor_uint, kAtomicXor_ulong, kAtomicXor_ulonglong);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicXor_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
// Please check the content of negative_kernels_rtc.hh
|
||||
int expected_error_count{9};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
/* int atomicXor(int* address, int val) */
|
||||
__global__ void atomicXor_int_v1(int* address, int* result) { *result = atomicXor(&address, 1234); }
|
||||
|
||||
__global__ void atomicXor_int_v2(int* address, int* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v3(int* address, int* result) { *result = atomicXor(1234, 1234); }
|
||||
|
||||
__global__ void atomicXor_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v5(char* address, int* result) { *result = atomicXor(address, 1234); }
|
||||
|
||||
__global__ void atomicXor_int_v6(short* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v7(long* address, int* result) { *result = atomicXor(address, 1234); }
|
||||
|
||||
__global__ void atomicXor_int_v8(long long* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v9(float* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v10(double* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
/* unsigned int atomicXor(unsigned int* address, unsigned int val) */
|
||||
__global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v9(float* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v10(double* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicXor(unsigned long* address, unsigned long val) */
|
||||
__global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v9(float* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v10(double* address, unsigned long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
/* atomicXor(unsigned long long* address, unsigned long long val) */
|
||||
__global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) {
|
||||
*result = atomicOr(address, 1234);
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicXor_int{
|
||||
R"(
|
||||
__global__ void atomicXor_int_v1(int* address, int* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v2(int* address, int* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v3(int* address, int* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicXor_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v5(char* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v6(short* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v7(long* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v8(long long* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v9(float* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_int_v10(double* address, int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicXor_uint{
|
||||
R"(
|
||||
__global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v9(float* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_uint_v10(double* address, unsigned int* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicXor_ulong{
|
||||
R"(
|
||||
__global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v9(float* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulong_v10(double* address, unsigned long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicXor_ulonglong{
|
||||
R"(
|
||||
__global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(&address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, address);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(1234, 1234);
|
||||
}
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v9(float* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
|
||||
__global__ void atomicXor_ulonglong_v10(double* address, unsigned long long* result) {
|
||||
*result = atomicXor(address, 1234);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicXor_system atomicXor_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `atomicXor_system(TestType* address, TestType* val)` -
|
||||
* performs system-wide atomic bitwise XOR between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor_system from multiple threads on the same address.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
|
||||
unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor_system from multiple threads on adjacent addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs atomicXor_system from multiple threads on scattered addresses.
|
||||
* - Uses multiple devices and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicXor_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,458 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
// Trivially-copyable class.
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void StoreCompileKernel(int* x) {
|
||||
// Valid combinations
|
||||
__hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
__hip_atomic_store(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
__hip_atomic_store(*x, 1, kMemOrder, kMemScope);
|
||||
// Consume not allowed by C++1 for store
|
||||
__hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
// Acquire not allowed by C++11 for store
|
||||
__hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
// Acquire-Release not allowed by C++11 for store
|
||||
__hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
// Memory order is out of bounds
|
||||
__hip_atomic_store(x, 1, -1, kMemScope);
|
||||
__hip_atomic_store(x, 1, 10, kMemScope);
|
||||
// Memory scope is out of bounds
|
||||
__hip_atomic_store(x, 1, kMemOrder, -1);
|
||||
__hip_atomic_store(x, 1, kMemOrder, 10);
|
||||
|
||||
// Storing an object that is not trivially-copyable
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
__hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope);
|
||||
|
||||
// Storing an object that is trivially-copyable
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
__hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void LoadCompileKernel(int* x, int* y) {
|
||||
// Valid combinations
|
||||
*y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Value instead of pointer to the atomic builtin for 1st parameter
|
||||
*y = __hip_atomic_load(*x, kMemOrder, kMemScope);
|
||||
// Release not allowed by C++11 for load
|
||||
*y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope);
|
||||
// Acquire-Release not allowed by C++11 for load
|
||||
*y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope);
|
||||
// Memory order is out of bounds
|
||||
*y = __hip_atomic_load(x, -1, kMemScope);
|
||||
*y = __hip_atomic_load(x, 10, kMemScope);
|
||||
// Memory scope is out of bounds
|
||||
*y = __hip_atomic_load(x, kMemOrder, -1);
|
||||
*y = __hip_atomic_load(x, kMemOrder, 10);
|
||||
|
||||
// Loading an object that is not trivially-copyable
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope);
|
||||
|
||||
// Loading an object that is trivially-copyable
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void CompareWeakCompileKernel(int* x, int* expected) {
|
||||
bool res{false};
|
||||
// Valid combinations
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
|
||||
// Release not allowed on fail by C++11
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
|
||||
// Acquire-Release not allowed on fail by C++11
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
|
||||
// Fail stronger than success
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
// Pointer to a non-const type
|
||||
res = __hip_atomic_compare_exchange_weak(reinterpret_cast<const int*>(x), expected, 1, kMemOrder,
|
||||
kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
|
||||
// Memory order on success is out of bounds
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope);
|
||||
// Memory order on failure is out of bounds
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope);
|
||||
// Memory scope is out of bounds
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10);
|
||||
|
||||
// User-defined class is not trivially-copyable and therefore cannot be atomically copied
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
Dummy dummy_c{};
|
||||
res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
|
||||
kMemScope);
|
||||
// User-defined class is trivially-copyable and can be atomically copied
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
DummyTC dummytc_c{};
|
||||
res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder,
|
||||
kMemScope);
|
||||
}
|
||||
|
||||
__global__ void CompareStrongCompileKernel(int* x, int* expected) {
|
||||
bool res{false};
|
||||
// Valid combinations
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
|
||||
// Release not allowed on fail by C++11
|
||||
res =
|
||||
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
|
||||
// Acquire-Release not allowed on fail by C++11
|
||||
res =
|
||||
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
|
||||
// Fail stronger than success
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
// Pointer to a non-const type
|
||||
res = __hip_atomic_compare_exchange_strong(reinterpret_cast<const int*>(x), expected, 1,
|
||||
kMemOrder, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin for 1st parameter
|
||||
res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
|
||||
// Memory order on success is out of bounds
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope);
|
||||
// Memory order on failure is out of bounds
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope);
|
||||
// Memory scope is out of bounds
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10);
|
||||
|
||||
// User-defined class is not trivially-copyable and therefore cannot be atomically copied
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
Dummy dummy_c{};
|
||||
res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
|
||||
kMemScope);
|
||||
// User-defined class is trivially-copyable and can be atomically copied
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
DummyTC dummytc_c{};
|
||||
res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder,
|
||||
kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void ExchangeCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_exchange(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_exchange(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_exchange(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_exchange(x, 1, kMemOrder, 10);
|
||||
|
||||
// User-defined class is not trivially-copyable and therefore cannot be atomically copied
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope);
|
||||
|
||||
// User-defined class is trivially-copyable and can be atomically copied
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void FetchAddCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_fetch_add(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_fetch_add(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void FetchAndCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_fetch_and(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_fetch_and(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10);
|
||||
|
||||
// Value must be an integer
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope);
|
||||
float float_var{1.5f};
|
||||
old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope);
|
||||
double double_var{1.5};
|
||||
old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void FetchOrCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_fetch_or(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_fetch_or(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10);
|
||||
|
||||
// Value must be an integer
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope);
|
||||
float float_var{1.5f};
|
||||
old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope);
|
||||
double double_var{1.5};
|
||||
old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void FetchXorCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_fetch_xor(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10);
|
||||
|
||||
// Value must be an integer
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope);
|
||||
float float_var{1.5f};
|
||||
old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope);
|
||||
double double_var{1.5};
|
||||
old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void FetchMaxCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_fetch_max(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_fetch_max(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10);
|
||||
|
||||
// Value must be integer or floating point type
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
|
||||
__global__ void FetchMinCompileKernel(int* x) {
|
||||
int old{};
|
||||
// Valid combinations
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
// Pointer to a non-const type
|
||||
old = __hip_atomic_fetch_min(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
// Value instead of pointer to the atomic builtin
|
||||
old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope);
|
||||
// Memory order out of bounds
|
||||
old = __hip_atomic_fetch_min(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, 10, kMemScope);
|
||||
// Memory scope out of bounds
|
||||
old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10);
|
||||
|
||||
// Value must be integer or floating point type
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
#include "atomic_builtins_kernels_rtc.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
void AtomicBuiltinsRTCWrapper(const char* program_source, int expected_errors_num,
|
||||
int expected_warnings_num) {
|
||||
hiprtcProgram program{};
|
||||
HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "atomics_builtins_kernels.cc", 0,
|
||||
nullptr, nullptr));
|
||||
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
int warning_count{0};
|
||||
|
||||
std::string error_message{"error:"};
|
||||
std::string warning_message{"warning:"};
|
||||
|
||||
size_t npos_e = log.find(error_message, 0);
|
||||
while (npos_e != std::string::npos) {
|
||||
++error_count;
|
||||
npos_e = log.find(error_message, npos_e + 1);
|
||||
}
|
||||
|
||||
size_t npos_w = log.find(warning_message, 0);
|
||||
while (npos_w != std::string::npos) {
|
||||
++warning_count;
|
||||
npos_w = log.find(warning_message, npos_w + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_errors_num);
|
||||
REQUIRE(warning_count == expected_warnings_num);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles atomic builtins while passing parameters that shall cause:
|
||||
* -# Compiler warnings
|
||||
* -# Compiler errors
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomic_builtins.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_AtomicBuiltins_Negative_Parameters_RTC") {
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinStore, 5, 5);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinLoad, 4, 4);
|
||||
/* Begin: Should be 5 errors, 6 warnings for both. See EXSWHTEC-309*/
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinCompExWeak, 5, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinCompExStrong, 5, 2);
|
||||
/* End. */
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinExchange, 5, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinFetchAdd, 5, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinFetchAnd, 7, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinFetchOr, 7, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinFetchXor, 7, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinFetchMax, 5, 2);
|
||||
AtomicBuiltinsRTCWrapper(kBuiltinFetchMin, 5, 2);
|
||||
}
|
||||
@@ -0,0 +1,590 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Positive and negative kernels used for the builtin atomic Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kBuiltinStore{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void StoreCompileKernel(int* x) {
|
||||
__hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
__hip_atomic_store(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
__hip_atomic_store(*x, 1, kMemOrder, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
__hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
__hip_atomic_store(x, 1, -1, kMemScope);
|
||||
__hip_atomic_store(x, 1, 10, kMemScope);
|
||||
__hip_atomic_store(x, 1, kMemOrder, -1);
|
||||
__hip_atomic_store(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
__hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope);
|
||||
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
__hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinLoad{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void LoadCompileKernel(int* x, int* y) {
|
||||
*y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
*y = __hip_atomic_load(*x, kMemOrder, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope);
|
||||
*y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope);
|
||||
*y = __hip_atomic_load(x, -1, kMemScope);
|
||||
*y = __hip_atomic_load(x, 10, kMemScope);
|
||||
*y = __hip_atomic_load(x, kMemOrder, -1);
|
||||
*y = __hip_atomic_load(x, kMemOrder, 10);
|
||||
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope);
|
||||
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinCompExWeak{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void CompareWeakCompileKernel(int* x, int* expected) {
|
||||
bool res{false};
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(reinterpret_cast<const int*>(x), expected, 1, kMemOrder,
|
||||
kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1);
|
||||
res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10);
|
||||
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
Dummy dummy_c{};
|
||||
res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
|
||||
kMemScope);
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
DummyTC dummytc_c{};
|
||||
res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder,
|
||||
kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinCompExStrong{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void CompareStrongCompileKernel(int* x, int* expected) {
|
||||
bool res{false};
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
|
||||
res =
|
||||
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope);
|
||||
res =
|
||||
__hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||
kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(reinterpret_cast<const int*>(x), expected, 1,
|
||||
kMemOrder, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1);
|
||||
res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10);
|
||||
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
Dummy dummy_c{};
|
||||
res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder,
|
||||
kMemScope);
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
DummyTC dummytc_c{};
|
||||
res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder,
|
||||
kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinExchange{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void ExchangeCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_exchange(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_exchange(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_exchange(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy_a{};
|
||||
Dummy dummy_b{};
|
||||
dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope);
|
||||
|
||||
DummyTC dummytc_a{};
|
||||
DummyTC dummytc_b{};
|
||||
dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinFetchAdd{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void FetchAddCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_fetch_add(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinFetchAnd{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void FetchAndCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_fetch_and(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope);
|
||||
float float_var{1.5f};
|
||||
old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope);
|
||||
double double_var{1.5};
|
||||
old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinFetchOr{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void FetchOrCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_fetch_or(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope);
|
||||
float float_var{1.5f};
|
||||
old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope);
|
||||
double double_var{1.5};
|
||||
old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static auto constexpr kBuiltinFetchXor{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void FetchXorCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_fetch_xor(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope);
|
||||
float float_var{1.5f};
|
||||
old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope);
|
||||
double double_var{1.5};
|
||||
old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinFetchMax{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void FetchMaxCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_fetch_max(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kBuiltinFetchMin{R"(
|
||||
constexpr int kMemOrder = __ATOMIC_RELAXED;
|
||||
constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM;
|
||||
|
||||
class DummyTC {
|
||||
public:
|
||||
__device__ DummyTC() {}
|
||||
__device__ ~DummyTC() = default;
|
||||
__device__ DummyTC(const DummyTC&) = default;
|
||||
__device__ DummyTC& operator=(const DummyTC&) = default;
|
||||
__device__ DummyTC(DummyTC&&) = default;
|
||||
__device__ DummyTC& operator=(DummyTC&&) = default;
|
||||
};
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void FetchMinCompileKernel(int* x) {
|
||||
int old{};
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope);
|
||||
|
||||
old = __hip_atomic_fetch_min(reinterpret_cast<const int*>(x), 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, -1, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, 10, kMemScope);
|
||||
old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1);
|
||||
old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10);
|
||||
|
||||
Dummy dummy{};
|
||||
old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,412 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmd_options.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
namespace Bitwise {
|
||||
enum class AtomicOperation {
|
||||
kAnd = 0,
|
||||
kAndSystem,
|
||||
kOr,
|
||||
kOrSystem,
|
||||
kXor,
|
||||
kXorSystem,
|
||||
kBuiltinAnd,
|
||||
kBuiltinOr,
|
||||
kBuiltinXor
|
||||
};
|
||||
|
||||
constexpr auto kMask = 0xAAAA;
|
||||
constexpr auto kTestValue = 0x4545;
|
||||
constexpr auto kAndTestValue = 0xFFFF;
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
__host__ __device__ TestType GetTestValue() {
|
||||
if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) {
|
||||
return kAndTestValue;
|
||||
}
|
||||
|
||||
return kTestValue;
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
const auto mask = kMask;
|
||||
|
||||
if constexpr (operation == AtomicOperation::kAnd) {
|
||||
return atomicAnd(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kAndSystem) {
|
||||
return atomicAnd_system(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kOr) {
|
||||
return atomicOr(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kOrSystem) {
|
||||
return atomicOr_system(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kXor) {
|
||||
return atomicXor(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kXorSystem) {
|
||||
return atomicXor_system(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinAnd) {
|
||||
return __hip_atomic_fetch_and(mem, mask, __ATOMIC_RELAXED, memory_scope);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinOr) {
|
||||
return __hip_atomic_fetch_or(mem, mask, __ATOMIC_RELAXED, memory_scope);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinXor) {
|
||||
return __hip_atomic_fetch_xor(mem, mask, __ATOMIC_RELAXED, memory_scope);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
|
||||
__shared__ TestType shared_mem;
|
||||
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
|
||||
TestType* const mem = use_shared_mem ? &shared_mem : global_mem;
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
if (tid == 0) mem[0] = global_mem[0];
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
if (tid == 0) global_mem[0] = mem[0];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType>
|
||||
__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch,
|
||||
const unsigned int idx) {
|
||||
const auto byte_ptr = reinterpret_cast<uint8_t*>(ptr);
|
||||
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
|
||||
}
|
||||
|
||||
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
|
||||
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
|
||||
uint8_t val = *addr;
|
||||
val ^= 0xAB;
|
||||
*addr = val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
|
||||
const unsigned int width, const unsigned pitch) {
|
||||
extern __shared__ uint8_t shared_mem[];
|
||||
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
|
||||
TestType* const mem = use_shared_mem ? reinterpret_cast<TestType*>(shared_mem) : global_mem;
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
if (tid < width) {
|
||||
const auto target = PitchedOffset(mem, pitch, tid);
|
||||
*target = *PitchedOffset(global_mem, pitch, tid);
|
||||
};
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
const auto n = cooperative_groups::this_grid().size() - width;
|
||||
|
||||
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
|
||||
|
||||
if (tid < n) {
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
|
||||
PitchedOffset(mem, pitch, tid % width));
|
||||
} else {
|
||||
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
|
||||
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
|
||||
GenerateMemoryTraffic(begin_addr, end_addr);
|
||||
}
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
if (tid < width) {
|
||||
const auto target = PitchedOffset(global_mem, pitch, tid);
|
||||
*target = *PitchedOffset(mem, pitch, tid);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
struct TestParams {
|
||||
auto ThreadCount() const {
|
||||
return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z;
|
||||
}
|
||||
|
||||
dim3 blocks;
|
||||
dim3 threads;
|
||||
unsigned int num_devices = 1u;
|
||||
unsigned int kernel_count = 1u;
|
||||
unsigned int width = 1u;
|
||||
unsigned int pitch = 0u;
|
||||
unsigned int host_thread_count = 0u;
|
||||
LinearAllocs alloc_type;
|
||||
};
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const TestParams& p) {
|
||||
const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount();
|
||||
|
||||
TestType test_value = GetTestValue<TestType, operation>();
|
||||
const auto mask = kMask;
|
||||
std::vector<TestType> res_vals(p.width, test_value);
|
||||
std::vector<TestType> old_vals;
|
||||
old_vals.reserve(thread_count);
|
||||
|
||||
for (auto tid = 0u; tid < thread_count; ++tid) {
|
||||
auto& res = res_vals[tid % p.width];
|
||||
old_vals.push_back(res);
|
||||
|
||||
if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem ||
|
||||
operation == AtomicOperation::kBuiltinAnd) {
|
||||
res = res & mask;
|
||||
} else if constexpr (operation == AtomicOperation::kOr ||
|
||||
operation == AtomicOperation::kOrSystem ||
|
||||
operation == AtomicOperation::kBuiltinOr) {
|
||||
res = res | mask;
|
||||
} else if constexpr (operation == AtomicOperation::kXor ||
|
||||
operation == AtomicOperation::kXorSystem ||
|
||||
operation == AtomicOperation::kBuiltinXor) {
|
||||
res = res ^ mask;
|
||||
}
|
||||
}
|
||||
|
||||
return {res_vals, old_vals};
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<TestType>& old_vals) {
|
||||
auto [expected_res_vals, expected_old_vals] = TestKernelHostRef<TestType, operation>(p);
|
||||
|
||||
for (auto i = 0u; i < res_vals.size(); ++i) {
|
||||
INFO("Results index: " << i);
|
||||
REQUIRE(expected_res_vals[i] == res_vals[i]);
|
||||
}
|
||||
|
||||
std::sort(begin(old_vals), end(old_vals));
|
||||
std::sort(begin(expected_old_vals), end(expected_old_vals));
|
||||
for (auto i = 0u; i < old_vals.size(); ++i) {
|
||||
INFO("Old values index: " << i);
|
||||
REQUIRE(expected_old_vals[i] == old_vals[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
|
||||
TestType* const old_vals) {
|
||||
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
|
||||
if (p.width == 1 && p.pitch == sizeof(TestType))
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
|
||||
else
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void TestCore(const TestParams& p) {
|
||||
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
|
||||
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
|
||||
std::vector<StreamGuard> streams;
|
||||
for (auto i = 0; i < p.num_devices; ++i) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size);
|
||||
for (auto j = 0; j < p.kernel_count; ++j) {
|
||||
streams.emplace_back(Streams::created);
|
||||
}
|
||||
}
|
||||
|
||||
const auto mem_alloc_size = p.width * p.pitch;
|
||||
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size);
|
||||
|
||||
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount());
|
||||
std::vector<TestType> res_vals(p.width);
|
||||
|
||||
TestType* const mem_ptr =
|
||||
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
|
||||
|
||||
TestType test_value = GetTestValue<TestType, operation>();
|
||||
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
|
||||
for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) {
|
||||
HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
for (auto j = 0u; j < p.kernel_count; ++j) {
|
||||
const auto& stream = streams[i * p.kernel_count + j].stream();
|
||||
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
|
||||
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
|
||||
old_vals);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
const auto device_offset = i * p.kernel_count * p.ThreadCount();
|
||||
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
|
||||
old_vals_alloc_size, hipMemcpyDeviceToHost));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
|
||||
p.width, hipMemcpyDeviceToHost));
|
||||
|
||||
Verify<TestType, operation>(p, res_vals, old_vals);
|
||||
}
|
||||
|
||||
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
|
||||
|
||||
inline dim3 GenerateBlockDimensions() {
|
||||
int sm_count = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
|
||||
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = 1;
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
|
||||
operation == AtomicOperation::kBuiltinOr ||
|
||||
operation == AtomicOperation::kBuiltinXor) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
|
||||
params.threads = 1;
|
||||
} else if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
|
||||
operation == AtomicOperation::kBuiltinOr ||
|
||||
operation == AtomicOperation::kBuiltinXor) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
params.threads = dim3(warp_size);
|
||||
} else {
|
||||
params.threads = GenerateThreadDimensions();
|
||||
}
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
SECTION("Global memory") {
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
|
||||
operation == AtomicOperation::kBuiltinOr ||
|
||||
operation == AtomicOperation::kBuiltinXor) &&
|
||||
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
|
||||
params.blocks = dim3(1);
|
||||
} else {
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
}
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Shared memory") {
|
||||
params.blocks = dim3(1);
|
||||
params.alloc_type = LinearAllocs::hipMalloc;
|
||||
TestCore<TestType, operation, true>(params);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width,
|
||||
const unsigned int pitch) {
|
||||
int concurrent_kernels = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0));
|
||||
if (!concurrent_kernels) {
|
||||
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
|
||||
return;
|
||||
}
|
||||
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
params.threads = GenerateThreadDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void MultipleDeviceMultipleKernelTest(const unsigned int num_devices,
|
||||
const unsigned int kernel_count, const unsigned int width,
|
||||
const unsigned int pitch) {
|
||||
if (num_devices > 1) {
|
||||
if (HipTest::getDeviceCount() < num_devices) {
|
||||
std::string msg = std::to_string(num_devices) + " devices are required";
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (kernel_count > 1) {
|
||||
for (auto i = 0u; i < num_devices; ++i) {
|
||||
int concurrent_kernels = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
|
||||
if (!concurrent_kernels) {
|
||||
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TestParams params;
|
||||
params.num_devices = num_devices;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
params.threads = GenerateThreadDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Bitwise
|
||||
@@ -0,0 +1,433 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
enum class BuiltinAtomicOperation {
|
||||
kLoadStore = 0,
|
||||
kExchange,
|
||||
kCompareExchangeStrong,
|
||||
kCompareExchangeWeak,
|
||||
kAdd,
|
||||
kAnd,
|
||||
kOr,
|
||||
kXor,
|
||||
kMin,
|
||||
kMax
|
||||
};
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__host__ __device__ void SetFlag(int* const flag) {
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
if constexpr (operation == BuiltinAtomicOperation::kLoadStore) {
|
||||
static_assert(memory_order != __ATOMIC_ACQ_REL);
|
||||
__hip_atomic_store(flag, 1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kExchange) {
|
||||
__hip_atomic_exchange(flag, 1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) {
|
||||
int compare = 0;
|
||||
__hip_atomic_compare_exchange_strong(flag, &compare, 1, memory_order, __ATOMIC_RELAXED,
|
||||
memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) {
|
||||
int compare = 0;
|
||||
while (!__hip_atomic_compare_exchange_weak(flag, &compare, 1, memory_order, __ATOMIC_RELAXED,
|
||||
memory_scope))
|
||||
compare = 0;
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kAdd) {
|
||||
__hip_atomic_fetch_add(flag, 1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kAnd) {
|
||||
__hip_atomic_fetch_and(flag, 0x0, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kOr) {
|
||||
__hip_atomic_fetch_or(flag, 0x1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kXor) {
|
||||
__hip_atomic_fetch_xor(flag, 0x1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kMin) {
|
||||
__hip_atomic_fetch_min(flag, -1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kMax) {
|
||||
__hip_atomic_fetch_max(flag, 1, memory_order, memory_scope);
|
||||
}
|
||||
#else
|
||||
if constexpr (operation == BuiltinAtomicOperation::kAnd) {
|
||||
__atomic_store_n(flag, 0, __ATOMIC_RELEASE);
|
||||
} else {
|
||||
__atomic_store_n(flag, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__host__ __device__ int FetchFlag(int* const flag) {
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
if constexpr (operation == BuiltinAtomicOperation::kLoadStore) {
|
||||
static_assert(memory_order != __ATOMIC_ACQ_REL);
|
||||
return __hip_atomic_load(flag, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kExchange) {
|
||||
return __hip_atomic_exchange(flag, 0, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) {
|
||||
int compare = 1;
|
||||
__hip_atomic_compare_exchange_strong(
|
||||
flag, &compare, 1, memory_order,
|
||||
memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope);
|
||||
return compare;
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) {
|
||||
int compare = 1;
|
||||
__hip_atomic_compare_exchange_weak(
|
||||
flag, &compare, 1, memory_order,
|
||||
memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope);
|
||||
return compare;
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kAdd) {
|
||||
return __hip_atomic_fetch_add(flag, 0, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kAnd) {
|
||||
return !__hip_atomic_fetch_and(flag, 0x1, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kOr) {
|
||||
return __hip_atomic_fetch_or(flag, 0x0, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kXor) {
|
||||
return __hip_atomic_fetch_xor(flag, 0x0, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kMin) {
|
||||
return __hip_atomic_fetch_min(flag, 0, memory_order, memory_scope);
|
||||
} else if constexpr (operation == BuiltinAtomicOperation::kMax) {
|
||||
return __hip_atomic_fetch_max(flag, 0, memory_order, memory_scope);
|
||||
}
|
||||
#else
|
||||
if constexpr (operation == BuiltinAtomicOperation::kAnd) {
|
||||
return !__atomic_load_n(flag, __ATOMIC_ACQUIRE);
|
||||
} else {
|
||||
return __atomic_load_n(flag, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace AcquireRelease {
|
||||
|
||||
constexpr auto kTestValue = 42;
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__host__ __device__ void Producer(int* const flag, int* const data) {
|
||||
constexpr int actual_memory_order =
|
||||
memory_order == __ATOMIC_ACQUIRE ? __ATOMIC_RELEASE : memory_order;
|
||||
|
||||
data[0] = kTestValue;
|
||||
|
||||
SetFlag<operation, actual_memory_order, memory_scope>(flag);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__host__ __device__ void Consumer(int* const flag, int* const data, int* const ret) {
|
||||
while (!FetchFlag<operation, memory_order, memory_scope>(flag))
|
||||
;
|
||||
|
||||
ret[0] = data[0];
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__global__ void TestKernel(int* const flag, int* data, int* const ret) {
|
||||
__shared__ int shared_mem;
|
||||
|
||||
if (data == nullptr) data = &shared_mem;
|
||||
|
||||
if (blockIdx.x == 0 && threadIdx.x == 0) {
|
||||
if constexpr (operation == BuiltinAtomicOperation::kAnd)
|
||||
*flag = 1;
|
||||
else
|
||||
*flag = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
bool producer = false, consumer = false;
|
||||
|
||||
if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
producer = blockIdx.x == 0 && threadIdx.x == 0;
|
||||
consumer = blockIdx.x == 0 && threadIdx.x == 1;
|
||||
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
|
||||
producer = blockIdx.x == 0 && threadIdx.x == 0;
|
||||
consumer = blockIdx.x == 0 && threadIdx.x == warpSize;
|
||||
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
|
||||
producer = blockIdx.x == 0 && threadIdx.x == 0;
|
||||
consumer = blockIdx.x == 1 && threadIdx.x == 0;
|
||||
}
|
||||
|
||||
if (producer) {
|
||||
Producer<operation, memory_order, memory_scope>(flag, data);
|
||||
return;
|
||||
}
|
||||
|
||||
if (consumer) {
|
||||
Consumer<operation, memory_order, memory_scope>(flag, data, ret);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__global__ void ProducerKernel(int* const flag, int* const data) {
|
||||
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Producer<operation, memory_order, memory_scope>(flag, data);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope>
|
||||
__global__ void ConsumerKernel(int* const flag, int* const data, int* const ret) {
|
||||
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Consumer<operation, memory_order, memory_scope>(flag, data, ret);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order, int memory_scope> void Test() {
|
||||
int blocks = 1, threads = 1;
|
||||
if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
blocks = 1;
|
||||
threads = 2;
|
||||
} else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
|
||||
blocks = 1;
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
threads = warp_size * 2;
|
||||
} else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
|
||||
blocks = 2;
|
||||
threads = 1;
|
||||
}
|
||||
|
||||
LinearAllocGuard<int> flag(LinearAllocs::hipMalloc, sizeof(int));
|
||||
LinearAllocGuard<int> ret(LinearAllocs::hipMallocManaged, sizeof(int));
|
||||
|
||||
SECTION("Global memory") {
|
||||
const auto alloc_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipMallocManaged);
|
||||
LinearAllocGuard<int> data(alloc_type, sizeof(int));
|
||||
TestKernel<operation, memory_order, memory_scope>
|
||||
<<<blocks, threads>>>(flag.ptr(), data.ptr(), ret.ptr());
|
||||
}
|
||||
|
||||
if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) {
|
||||
SECTION("Shared memory") {
|
||||
TestKernel<operation, memory_order, memory_scope>
|
||||
<<<blocks, threads>>>(flag.ptr(), nullptr, ret.ptr());
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
REQUIRE(ret.ptr()[0] == kTestValue);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_order> void SystemTest() {
|
||||
std::thread host_thread;
|
||||
|
||||
LinearAllocGuard<int> flag(LinearAllocs::hipMallocManaged, sizeof(int));
|
||||
LinearAllocGuard<int> ret(LinearAllocs::hipMallocManaged, sizeof(int));
|
||||
|
||||
SECTION("Global memory") {
|
||||
const auto alloc_type = GENERATE(LinearAllocs::hipHostMalloc, LinearAllocs::hipMallocManaged);
|
||||
LinearAllocGuard<int> data(alloc_type, sizeof(int));
|
||||
|
||||
SECTION("Host producer - Device consumer") {
|
||||
ConsumerKernel<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>
|
||||
<<<1, 1>>>(flag.ptr(), data.ptr(), ret.ptr());
|
||||
host_thread = std::thread([&] {
|
||||
Producer<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>(flag.ptr(), data.ptr());
|
||||
});
|
||||
}
|
||||
|
||||
SECTION("Device producer - Host consumer") {
|
||||
host_thread = std::thread([&] {
|
||||
Consumer<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>(flag.ptr(), data.ptr(),
|
||||
ret.ptr());
|
||||
});
|
||||
ProducerKernel<operation, memory_order, __HIP_MEMORY_SCOPE_SYSTEM>
|
||||
<<<1, 1>>>(flag.ptr(), data.ptr());
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
host_thread.join();
|
||||
|
||||
REQUIRE(ret.ptr()[0] == kTestValue);
|
||||
}
|
||||
|
||||
} /* namespace AcquireRelease */
|
||||
|
||||
namespace SequentialConsistency {
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_scope>
|
||||
__host__ __device__ void Producer(int* const flag) {
|
||||
__atomic_store_n(flag, 1, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_scope>
|
||||
__host__ __device__ void Consumer(int* const flag1, int* const flag2, int* const counter) {
|
||||
while (!FetchFlag<operation, __ATOMIC_SEQ_CST, memory_scope>(flag1))
|
||||
;
|
||||
if (FetchFlag<operation, __ATOMIC_SEQ_CST, memory_scope>(flag2)) {
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
__hip_atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST, memory_scope);
|
||||
#else
|
||||
__atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_scope>
|
||||
__global__ void TestKernel(int* flag1, int* flag2, int* const counter) {
|
||||
__shared__ int shared_mem[2];
|
||||
|
||||
if (flag1 == nullptr) flag1 = &shared_mem[0];
|
||||
if (flag2 == nullptr) flag2 = &shared_mem[1];
|
||||
|
||||
if (blockIdx.x == 0 && threadIdx.x == 0) {
|
||||
if constexpr (operation == BuiltinAtomicOperation::kAnd) {
|
||||
*flag1 = 1;
|
||||
*flag2 = 1;
|
||||
} else {
|
||||
*flag1 = 0;
|
||||
*flag2 = 0;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
bool producer1 = false, producer2 = false, consumer1 = false, consumer2 = false;
|
||||
|
||||
if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
producer1 = blockIdx.x == 0 && threadIdx.x == 0;
|
||||
consumer1 = blockIdx.x == 0 && threadIdx.x == 1;
|
||||
producer2 = blockIdx.x == 0 && threadIdx.x == 2;
|
||||
consumer2 = blockIdx.x == 0 && threadIdx.x == 3;
|
||||
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
|
||||
producer1 = blockIdx.x == 0 && threadIdx.x == 0;
|
||||
consumer1 = blockIdx.x == 0 && threadIdx.x == warpSize;
|
||||
producer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 2;
|
||||
consumer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 3;
|
||||
} else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
|
||||
producer1 = blockIdx.x == 0 && threadIdx.x == 0;
|
||||
consumer1 = blockIdx.x == 1 && threadIdx.x == 0;
|
||||
producer2 = blockIdx.x == 2 && threadIdx.x == 0;
|
||||
consumer2 = blockIdx.x == 3 && threadIdx.x == 0;
|
||||
}
|
||||
|
||||
if (producer1) {
|
||||
Producer<operation, memory_scope>(flag1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (consumer1) {
|
||||
Consumer<operation, memory_scope>(flag1, flag2, counter);
|
||||
return;
|
||||
}
|
||||
|
||||
if (producer2) {
|
||||
Producer<operation, memory_scope>(flag2);
|
||||
return;
|
||||
}
|
||||
|
||||
if (consumer2) {
|
||||
Consumer<operation, memory_scope>(flag2, flag1, counter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_scope>
|
||||
__global__ void ProducerKernel(int* const flag) {
|
||||
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Producer<operation, memory_scope>(flag);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_scope>
|
||||
__global__ void ConsumerKernel(int* const flag1, int* const flag2, int* const counter) {
|
||||
if (!(blockIdx.x == 0 && threadIdx.x == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Consumer<operation, memory_scope>(flag1, flag2, counter);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation, int memory_scope> void Test() {
|
||||
int blocks = 1, threads = 1;
|
||||
if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
blocks = 1;
|
||||
threads = 4;
|
||||
} else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) {
|
||||
blocks = 1;
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
threads = warp_size * 4;
|
||||
} else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) {
|
||||
blocks = 4;
|
||||
threads = 1;
|
||||
}
|
||||
|
||||
LinearAllocGuard<int> counter(LinearAllocs::hipMallocManaged, sizeof(int));
|
||||
|
||||
SECTION("Global memory") {
|
||||
const auto alloc_type = GENERATE(LinearAllocs::hipMalloc);
|
||||
LinearAllocGuard<int> flag1(alloc_type, sizeof(int));
|
||||
LinearAllocGuard<int> flag2(alloc_type, sizeof(int));
|
||||
TestKernel<operation, memory_scope>
|
||||
<<<blocks, threads>>>(flag1.ptr(), flag2.ptr(), counter.ptr());
|
||||
}
|
||||
|
||||
if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) {
|
||||
SECTION("Shared memory") {
|
||||
TestKernel<operation, memory_scope><<<blocks, threads>>>(nullptr, nullptr, counter.ptr());
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
REQUIRE(counter.ptr()[0] != 0);
|
||||
}
|
||||
|
||||
template <BuiltinAtomicOperation operation> void SystemTest() {
|
||||
std::thread host_producer, host_consumer;
|
||||
|
||||
LinearAllocGuard<int> counter(LinearAllocs::hipMallocManaged, sizeof(int));
|
||||
|
||||
SECTION("Global memory") {
|
||||
const auto alloc_type = GENERATE(LinearAllocs::hipMallocManaged);
|
||||
LinearAllocGuard<int> flag1(alloc_type, sizeof(int));
|
||||
LinearAllocGuard<int> flag2(alloc_type, sizeof(int));
|
||||
|
||||
ConsumerKernel<operation, __HIP_MEMORY_SCOPE_SYSTEM>
|
||||
<<<1, 1>>>(flag1.ptr(), flag2.ptr(), counter.ptr());
|
||||
host_consumer = std::thread([&] {
|
||||
Consumer<operation, __HIP_MEMORY_SCOPE_SYSTEM>(flag2.ptr(), flag1.ptr(), counter.ptr());
|
||||
});
|
||||
|
||||
ProducerKernel<operation, __HIP_MEMORY_SCOPE_SYSTEM><<<1, 1>>>(flag1.ptr());
|
||||
host_producer =
|
||||
std::thread([&] { Producer<operation, __HIP_MEMORY_SCOPE_SYSTEM>(flag2.ptr()); });
|
||||
}
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
host_producer.join();
|
||||
host_consumer.join();
|
||||
|
||||
REQUIRE(counter.ptr()[0] != 0);
|
||||
}
|
||||
|
||||
} // namespace SequentialConsistency
|
||||
@@ -0,0 +1,420 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <cmd_options.hh>
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
namespace MinMax {
|
||||
enum class AtomicOperation {
|
||||
kMin = 0,
|
||||
kMinSystem,
|
||||
kMax,
|
||||
kMaxSystem,
|
||||
kSafeMin,
|
||||
kUnsafeMin,
|
||||
kSafeMax,
|
||||
kUnsafeMax,
|
||||
kBuiltinMin,
|
||||
kBuiltinMax
|
||||
};
|
||||
|
||||
constexpr auto kIntegerTestValue = 5;
|
||||
constexpr auto kFloatingPointTestValue = 5.5;
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
__host__ __device__ TestType GetTestValue() {
|
||||
TestType test_value =
|
||||
std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
|
||||
|
||||
if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem ||
|
||||
operation == AtomicOperation::kUnsafeMin ||
|
||||
operation == AtomicOperation::kSafeMin) {
|
||||
return test_value - 2;
|
||||
}
|
||||
|
||||
return test_value + 2;
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
const auto val = GetTestValue<TestType, operation>();
|
||||
|
||||
if constexpr (operation == AtomicOperation::kMin) {
|
||||
return atomicMin(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kMinSystem) {
|
||||
return atomicMin_system(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kMax) {
|
||||
return atomicMax(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kMaxSystem) {
|
||||
return atomicMax_system(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kUnsafeMin) {
|
||||
return unsafeAtomicMin(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kSafeMin) {
|
||||
return safeAtomicMin(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kUnsafeMax) {
|
||||
return unsafeAtomicMax(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kSafeMax) {
|
||||
return safeAtomicMax(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinMin) {
|
||||
return __hip_atomic_fetch_min(mem, val, __ATOMIC_RELAXED, memory_scope);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinMax) {
|
||||
return __hip_atomic_fetch_max(mem, val, __ATOMIC_RELAXED, memory_scope);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
|
||||
__shared__ TestType shared_mem;
|
||||
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
|
||||
TestType* const mem = use_shared_mem ? &shared_mem : global_mem;
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
if (tid == 0) mem[0] = global_mem[0];
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
if (tid == 0) global_mem[0] = mem[0];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType>
|
||||
__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch,
|
||||
const unsigned int idx) {
|
||||
const auto byte_ptr = reinterpret_cast<uint8_t*>(ptr);
|
||||
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
|
||||
}
|
||||
|
||||
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
|
||||
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
|
||||
uint8_t val = *addr;
|
||||
val ^= 0xAB;
|
||||
*addr = val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
|
||||
const unsigned int width, const unsigned pitch) {
|
||||
extern __shared__ uint8_t shared_mem[];
|
||||
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
|
||||
TestType* const mem = use_shared_mem ? reinterpret_cast<TestType*>(shared_mem) : global_mem;
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
if (tid < width) {
|
||||
const auto target = PitchedOffset(mem, pitch, tid);
|
||||
*target = *PitchedOffset(global_mem, pitch, tid);
|
||||
};
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
const auto n = cooperative_groups::this_grid().size() - width;
|
||||
|
||||
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
|
||||
|
||||
if (tid < n) {
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
|
||||
PitchedOffset(mem, pitch, tid % width));
|
||||
} else {
|
||||
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
|
||||
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
|
||||
GenerateMemoryTraffic(begin_addr, end_addr);
|
||||
}
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
if (tid < width) {
|
||||
const auto target = PitchedOffset(global_mem, pitch, tid);
|
||||
*target = *PitchedOffset(mem, pitch, tid);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
struct TestParams {
|
||||
auto ThreadCount() const {
|
||||
return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z;
|
||||
}
|
||||
|
||||
dim3 blocks;
|
||||
dim3 threads;
|
||||
unsigned int num_devices = 1u;
|
||||
unsigned int kernel_count = 1u;
|
||||
unsigned int width = 1u;
|
||||
unsigned int pitch = 0u;
|
||||
unsigned int host_thread_count = 0u;
|
||||
LinearAllocs alloc_type;
|
||||
};
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const TestParams& p) {
|
||||
const auto val = GetTestValue<TestType, operation>();
|
||||
|
||||
const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount();
|
||||
|
||||
TestType test_value =
|
||||
std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
|
||||
|
||||
std::vector<TestType> res_vals(p.width, test_value);
|
||||
std::vector<TestType> old_vals;
|
||||
old_vals.reserve(thread_count);
|
||||
|
||||
for (auto tid = 0u; tid < thread_count; ++tid) {
|
||||
auto& res = res_vals[tid % p.width];
|
||||
old_vals.push_back(res);
|
||||
|
||||
if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem ||
|
||||
operation == AtomicOperation::kUnsafeMin ||
|
||||
operation == AtomicOperation::kSafeMin ||
|
||||
operation == AtomicOperation::kBuiltinMin) {
|
||||
res = std::min(res, val);
|
||||
} else if constexpr (operation == AtomicOperation::kMax ||
|
||||
operation == AtomicOperation::kMaxSystem ||
|
||||
operation == AtomicOperation::kUnsafeMax ||
|
||||
operation == AtomicOperation::kSafeMax ||
|
||||
operation == AtomicOperation::kBuiltinMax) {
|
||||
res = std::max(res, val);
|
||||
}
|
||||
}
|
||||
|
||||
return {res_vals, old_vals};
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<TestType>& old_vals) {
|
||||
auto [expected_res_vals, expected_old_vals] = TestKernelHostRef<TestType, operation>(p);
|
||||
|
||||
for (auto i = 0u; i < res_vals.size(); ++i) {
|
||||
INFO("Results index: " << i);
|
||||
REQUIRE(expected_res_vals[i] == res_vals[i]);
|
||||
}
|
||||
|
||||
std::sort(begin(old_vals), end(old_vals));
|
||||
std::sort(begin(expected_old_vals), end(expected_old_vals));
|
||||
for (auto i = 0u; i < old_vals.size(); ++i) {
|
||||
INFO("Old values index: " << i);
|
||||
REQUIRE(expected_old_vals[i] == old_vals[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
|
||||
TestType* const old_vals) {
|
||||
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
|
||||
if (p.width == 1 && p.pitch == sizeof(TestType))
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
|
||||
else
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void TestCore(const TestParams& p) {
|
||||
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
|
||||
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
|
||||
std::vector<StreamGuard> streams;
|
||||
for (auto i = 0; i < p.num_devices; ++i) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size);
|
||||
for (auto j = 0; j < p.kernel_count; ++j) {
|
||||
streams.emplace_back(Streams::created);
|
||||
}
|
||||
}
|
||||
|
||||
const auto mem_alloc_size = p.width * p.pitch;
|
||||
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size);
|
||||
|
||||
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount());
|
||||
std::vector<TestType> res_vals(p.width);
|
||||
|
||||
TestType* const mem_ptr =
|
||||
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
|
||||
|
||||
TestType test_value =
|
||||
std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
|
||||
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
|
||||
for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) {
|
||||
HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
for (auto j = 0u; j < p.kernel_count; ++j) {
|
||||
const auto& stream = streams[i * p.kernel_count + j].stream();
|
||||
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
|
||||
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
|
||||
old_vals);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = 0u; i < p.num_devices; ++i) {
|
||||
const auto device_offset = i * p.kernel_count * p.ThreadCount();
|
||||
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
|
||||
old_vals_alloc_size, hipMemcpyDeviceToHost));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
|
||||
p.width, hipMemcpyDeviceToHost));
|
||||
|
||||
Verify<TestType, operation>(p, res_vals, old_vals);
|
||||
}
|
||||
|
||||
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
|
||||
|
||||
inline dim3 GenerateBlockDimensions() {
|
||||
int sm_count = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
|
||||
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = 1;
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinMin ||
|
||||
operation == AtomicOperation::kBuiltinMax) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
|
||||
params.threads = 1;
|
||||
} else if constexpr ((operation == AtomicOperation::kBuiltinMin ||
|
||||
operation == AtomicOperation::kBuiltinMax) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
params.threads = dim3(warp_size);
|
||||
} else {
|
||||
params.threads = GenerateThreadDimensions();
|
||||
}
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
SECTION("Global memory") {
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinMin ||
|
||||
operation == AtomicOperation::kBuiltinMax) &&
|
||||
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
|
||||
params.blocks = dim3(1);
|
||||
} else {
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
}
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Shared memory") {
|
||||
params.blocks = dim3(1);
|
||||
params.alloc_type = LinearAllocs::hipMalloc;
|
||||
TestCore<TestType, operation, true>(params);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width,
|
||||
const unsigned int pitch) {
|
||||
int concurrent_kernels = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0));
|
||||
if (!concurrent_kernels) {
|
||||
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
|
||||
return;
|
||||
}
|
||||
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GenerateThreadDimensions();
|
||||
params.threads = GenerateBlockDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
void MultipleDeviceMultipleKernelTest(const unsigned int num_devices,
|
||||
const unsigned int kernel_count, const unsigned int width,
|
||||
const unsigned int pitch) {
|
||||
if (num_devices > 1) {
|
||||
if (HipTest::getDeviceCount() < num_devices) {
|
||||
std::string msg = std::to_string(num_devices) + " devices are required";
|
||||
HipTest::HIP_SKIP_TEST(msg.c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (kernel_count > 1) {
|
||||
for (auto i = 0u; i < num_devices; ++i) {
|
||||
int concurrent_kernels = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i));
|
||||
if (!concurrent_kernels) {
|
||||
HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TestParams params;
|
||||
params.num_devices = num_devices;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GenerateThreadDimensions();
|
||||
params.threads = GenerateBlockDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace MinMax
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup safeAtomicAdd safeAtomicAdd
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of safeAtomicAdd
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSafeAdd>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSafeAdd>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kSafeAdd>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will
|
||||
* perform an atomic addition on a target memory location. Each thread will add the same value to
|
||||
* the memory location, storing the return value into a separate output array slot corresponding
|
||||
* to it. Once complete, the output array and target memory is validated to contain all the
|
||||
* expected values. Several memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of safeAtomicAdd
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive_Multi_Kernel", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSafeAdd>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSafeAdd>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kSafeAdd>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup safeAtomicMax safeAtomicMax
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `safeAtomicMax(TestType* address, TestType* val)` -
|
||||
* calculates maximum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMax from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_SameAddress", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMax from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Adjacent_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMax from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Scattered_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMax from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMax from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMax from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMax>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup safeAtomicMin safeAtomicMin
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `safeAtomicMin(TestType* address, TestType* val)` -
|
||||
* calculates minimum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMin from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_SameAddress", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMin from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Adjacent_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMin from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Scattered_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMin from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMin from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs safeAtomicMin from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/safeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kSafeMin>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "memory_order_common.hh"
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_load_store_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kLoadStore, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kLoadStore, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kLoadStore, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kLoadStore>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_exchange_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kExchange, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kExchange, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kExchange, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kExchange>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeStrong,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeStrong,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeStrong,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
SequentialConsistency::SystemTest<BuiltinAtomicOperation::kCompareExchangeStrong>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeWeak,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeWeak,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kCompareExchangeWeak,
|
||||
__HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") {
|
||||
SequentialConsistency::SystemTest<BuiltinAtomicOperation::kCompareExchangeWeak>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kAdd, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kAdd, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kAdd, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kAdd>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kAnd, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kAnd, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kAnd, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kAnd>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kOr, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kOr, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kOr, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kOr>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kXor, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kXor, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kXor, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kXor>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kMin, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kMin, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kMin, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kMin>(); }
|
||||
}
|
||||
|
||||
TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency") {
|
||||
SECTION("WAVEFRONT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kMax, __HIP_MEMORY_SCOPE_WAVEFRONT>();
|
||||
}
|
||||
SECTION("WORKGROUP") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kMax, __HIP_MEMORY_SCOPE_WORKGROUP>();
|
||||
}
|
||||
SECTION("AGENT") {
|
||||
SequentialConsistency::Test<BuiltinAtomicOperation::kMax, __HIP_MEMORY_SCOPE_AGENT>();
|
||||
}
|
||||
SECTION("SYSTEM") { SequentialConsistency::SystemTest<BuiltinAtomicOperation::kMax>(); }
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup unsafeAtomicAdd unsafeAtomicAdd
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition on a target memory location. Each thread will add the same value to the memory location,
|
||||
* storing the return value into a separate output array slot corresponding to it. Once complete,
|
||||
* the output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of unsafeAtomicAdd
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will
|
||||
* perform an atomic addition on a target memory location. Each thread will add the same value to
|
||||
* the memory location, storing the return value into a separate output array slot corresponding
|
||||
* to it. Once complete, the output array and target memory is validated to contain all the
|
||||
* expected values. Several memory access patterns are tested:
|
||||
* -# All threads add to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of unsafeAtomicAdd
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicAdd.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive_Multi_Kernel", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kUnsafeAdd>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup unsafeAtomicMax unsafeAtomicMax
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `unsafeAtomicMax(TestType* address, TestType* val)` -
|
||||
* calculates maximum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMax from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_SameAddress", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMax from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Adjacent_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMax from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Scattered_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMax from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMax from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMax from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMax.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMax>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "min_max_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup unsafeAtomicMin unsafeAtomicMin
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
* `unsafeAtomicMin(TestType* address, TestType* val)` -
|
||||
* calculates minimum between address and val, returns old value.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMin from multiple threads on the same address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_SameAddress", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
|
||||
1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMin from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Adjacent_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
|
||||
warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMin from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Scattered_Addresses", "", float, double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceSingleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
|
||||
warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMin from multiple threads on the same address.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
|
||||
2, 1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMin from multiple threads on adjacent addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
|
||||
2, warp_size, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs unsafeAtomicMin from multiple threads on the scattered addresses.
|
||||
* - Uses only one device and launches multiple kernels.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/unsafeAtomicMin.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float,
|
||||
double) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
MinMax::SingleDeviceMultipleKernelTest<TestType, MinMax::AtomicOperation::kUnsafeMin>(
|
||||
2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -52,7 +52,7 @@ class CompileAndCapture(unittest.TestCase):
|
||||
# HIP compiler on AMD platforms has limit of 20 errors, and some negative
|
||||
# test cases expect that more errors are detected.
|
||||
if (self.platform == 'amd'):
|
||||
compiler_args.append('-ferror-limit=100')
|
||||
compiler_args.append('-ferror-limit=200')
|
||||
compiler_output = subprocess.run(compiler_args, stderr=subprocess.PIPE)
|
||||
# Get the compiler output in the stdout if -V flag is raised during ctest invocation.
|
||||
compiler_stderr = compiler_output.stderr.decode('UTF-8')
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
set(TEST_SRC
|
||||
thread_block.cc
|
||||
thread_block_tile.cc
|
||||
coalesced_group_tiled_partition.cc
|
||||
hipCGThreadBlockType_old.cc
|
||||
hipCGMultiGridGroupType_old.cc
|
||||
hipCGGridGroupType_old.cc
|
||||
|
||||
@@ -0,0 +1,685 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "cooperative_groups_common.hh"
|
||||
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
#include <cmd_options.hh>
|
||||
#include <cpu_grid.h>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
|
||||
/**
|
||||
* @addtogroup coalesced_group_tile coalesced_group_tile
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* Contains unit tests for partitioning of coalesced groups into tiled partitions
|
||||
*/
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
namespace {
|
||||
#if HT_AMD
|
||||
constexpr auto kMaskMin = std::numeric_limits<uint64_t>().min();
|
||||
constexpr auto kMaskLimit = std::numeric_limits<uint64_t>().max();
|
||||
#else
|
||||
constexpr auto kMaskMin = std::numeric_limits<uint32_t>().min();
|
||||
constexpr auto kMaskLimit = std::numeric_limits<uint32_t>().max();
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
static unsigned int GenerateTileSizes() {
|
||||
#if HT_AMD
|
||||
return GENERATE(2u, 4u, 8u, 16u, 32u, 64u);
|
||||
#else
|
||||
return GENERATE(2u, 4u, 8u, 16u, 32u);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline std::mt19937& GetRandomGenerator() {
|
||||
static std::mt19937 mt(11);
|
||||
return mt;
|
||||
}
|
||||
|
||||
template <typename T> static inline T GenerateRandomInteger(const T min, const T max) {
|
||||
std::uniform_int_distribution<T> dist(min, max);
|
||||
return dist(GetRandomGenerator());
|
||||
}
|
||||
|
||||
template <size_t warp_size> static auto coalesce_threads(const uint64_t mask) {
|
||||
std::tuple<std::array<unsigned int, warp_size>, unsigned int> res;
|
||||
auto& [threads, count] = res;
|
||||
|
||||
count = 0u;
|
||||
for (auto i = 0u; i < warp_size; ++i) {
|
||||
if (mask & (1u << i)) {
|
||||
threads[count++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
template <size_t warp_size> __device__ bool deactivate_thread(uint64_t* active_masks) {
|
||||
const cg::thread_block_tile<warp_size> warp =
|
||||
cg::tiled_partition<warp_size>(cg::this_thread_block());
|
||||
const auto block = cg::this_thread_block();
|
||||
const auto warps_per_block = (block.size() + warp_size - 1) / warp_size;
|
||||
const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
const auto idx = block_rank * warps_per_block + block.thread_rank() / warp.size();
|
||||
|
||||
return !(active_masks[idx] & (1u << warp.thread_rank()));
|
||||
}
|
||||
|
||||
|
||||
template <size_t warp_size>
|
||||
__global__ void coalesced_group_tiled_partition_size_getter(uint64_t* active_masks,
|
||||
unsigned int tile_size,
|
||||
unsigned int* sizes) {
|
||||
if (deactivate_thread<warp_size>(active_masks)) {
|
||||
return;
|
||||
}
|
||||
sizes[thread_rank_in_grid()] = cg::tiled_partition(cg::coalesced_threads(), tile_size).size();
|
||||
}
|
||||
|
||||
template <size_t warp_size>
|
||||
__global__ void coalesced_group_tiled_partition_thread_rank_getter(uint64_t* active_masks,
|
||||
unsigned int tile_size,
|
||||
unsigned int* sizes) {
|
||||
if (deactivate_thread<warp_size>(active_masks)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sizes[thread_rank_in_grid()] =
|
||||
cg::tiled_partition(cg::coalesced_threads(), tile_size).thread_rank();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Deactivates threads based on passed in mask and creates tiled partitions over coalesced
|
||||
* threads for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes the return values of
|
||||
* size and thread_rank member functions to an output array that is validated on the host side.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic") {
|
||||
const auto tile_size = GenerateTileSizes();
|
||||
INFO("Tile size: " << tile_size);
|
||||
auto blocks = GenerateBlockDimensions();
|
||||
auto threads = GenerateThreadDimensions();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(unsigned int);
|
||||
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
|
||||
const auto warps_in_grid = warps_in_block * grid.block_count_;
|
||||
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
|
||||
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
|
||||
[] { return GenerateRandomInteger(0u, std::numeric_limits<uint32_t>().max()); });
|
||||
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
|
||||
coalesced_group_tiled_partition_size_getter<32>
|
||||
<<<blocks, threads>>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
|
||||
coalesced_group_tiled_partition_thread_rank_getter<32>
|
||||
<<<blocks, threads>>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr());
|
||||
|
||||
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
|
||||
|
||||
// validate size
|
||||
for (auto i = 0u; i < warps_in_grid; ++i) {
|
||||
auto current_warp_mask = active_masks.ptr()[i];
|
||||
const auto shift_amount =
|
||||
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
|
||||
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
|
||||
|
||||
const auto [active_threads, active_thread_count] =
|
||||
coalesce_threads<kWarpSize>(current_warp_mask);
|
||||
|
||||
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
|
||||
const auto num_tiles = (active_thread_count + tile_size - 1) / tile_size;
|
||||
const auto tile_tail = num_tiles * tile_size - active_thread_count;
|
||||
// Step tile-sized window over active threads
|
||||
for (auto t = 0u; t < active_thread_count; t += tile_size) {
|
||||
const auto window_start = t;
|
||||
const auto window_end = t + tile_size;
|
||||
// Iterate through window
|
||||
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
|
||||
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
|
||||
const auto expected_val = tile_size - tile_tail * (t + tile_size >= active_thread_count);
|
||||
const auto actual_val = uint_arr.ptr()[global_thread_idx];
|
||||
INFO("global index: " << global_thread_idx);
|
||||
if (actual_val != expected_val) {
|
||||
REQUIRE(actual_val == expected_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// validate rank
|
||||
for (auto i = 0u; i < warps_in_grid; ++i) {
|
||||
auto current_warp_mask = active_masks.ptr()[i];
|
||||
const auto shift_amount =
|
||||
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
|
||||
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
|
||||
|
||||
const auto [active_threads, active_thread_count] =
|
||||
coalesce_threads<kWarpSize>(current_warp_mask);
|
||||
|
||||
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
|
||||
// Step tile-sized window over active threads
|
||||
for (auto t = 0u; t < active_thread_count; t += tile_size) {
|
||||
const auto window_start = t;
|
||||
const auto window_end = t + tile_size;
|
||||
// Iterate through window
|
||||
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
|
||||
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
|
||||
const auto expected_val = k % tile_size;
|
||||
const auto actual_val = uint_arr.ptr()[global_thread_idx];
|
||||
INFO("global index: " << global_thread_idx);
|
||||
if (actual_val != expected_val) {
|
||||
REQUIRE(actual_val == expected_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T, size_t warp_size>
|
||||
__global__ void coalesced_group_tiled_partition_shfl_up(uint64_t* active_masks, T* const out,
|
||||
const unsigned int tile_size,
|
||||
const unsigned int delta) {
|
||||
if (deactivate_thread<warp_size>(active_masks)) {
|
||||
return;
|
||||
}
|
||||
const cg::thread_block_tile<warp_size> warp =
|
||||
cg::tiled_partition<warp_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(warp.thread_rank());
|
||||
|
||||
const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size);
|
||||
out[thread_rank_in_grid()] = tile.shfl_up(var, delta);
|
||||
}
|
||||
|
||||
|
||||
template <typename T> static void CoalescedGroupTiledPartitonShflUpTestImpl() {
|
||||
const auto tile_size = GenerateTileSizes();
|
||||
INFO("Tile size: " << tile_size);
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
const auto delta = GENERATE_COPY(range(0u, tile_size));
|
||||
INFO("Delta: " << delta);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
|
||||
const auto warps_in_grid = warps_in_block * grid.block_count_;
|
||||
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
|
||||
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
|
||||
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
|
||||
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
|
||||
coalesced_group_tiled_partition_shfl_up<T, kWarpSize>
|
||||
<<<blocks, threads>>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
|
||||
|
||||
for (auto i = 0u; i < warps_in_grid; ++i) {
|
||||
auto current_warp_mask = active_masks.ptr()[i];
|
||||
const auto shift_amount =
|
||||
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
|
||||
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
|
||||
|
||||
const auto [active_threads, active_thread_count] =
|
||||
coalesce_threads<kWarpSize>(current_warp_mask);
|
||||
|
||||
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
|
||||
// Step tile-sized window over active threads
|
||||
for (auto t = 0u; t < active_thread_count; t += tile_size) {
|
||||
const auto window_start = t + delta;
|
||||
const auto window_end = t + tile_size;
|
||||
// Iterate through window
|
||||
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
|
||||
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
|
||||
const auto expected_val = active_threads[k - delta];
|
||||
const auto actual_val = uint_arr.ptr()[global_thread_idx];
|
||||
INFO("global index: " << global_thread_idx);
|
||||
if (actual_val != expected_val) {
|
||||
REQUIRE(actual_val == expected_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle up behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32,
|
||||
* 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced
|
||||
* group, with memberships of threads in the coalesced group being controlled via a passed in active
|
||||
* mask. The test is run for all overloads of shfl_up.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic", "", int,
|
||||
unsigned int, long, unsigned long, long long, unsigned long long, float,
|
||||
double) {
|
||||
CoalescedGroupTiledPartitonShflUpTestImpl<TestType>();
|
||||
}
|
||||
|
||||
|
||||
template <typename T, size_t warp_size>
|
||||
__global__ void coalesced_group_tiled_partition_shfl_down(uint64_t* active_masks, T* const out,
|
||||
const unsigned int tile_size,
|
||||
const unsigned int delta) {
|
||||
if (deactivate_thread<warp_size>(active_masks)) {
|
||||
return;
|
||||
}
|
||||
const cg::thread_block_tile<warp_size> warp =
|
||||
cg::tiled_partition<warp_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(warp.thread_rank());
|
||||
|
||||
const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size);
|
||||
out[thread_rank_in_grid()] = tile.shfl_down(var, delta);
|
||||
}
|
||||
|
||||
|
||||
template <typename T> static void CoalescedGroupTiledPartitonShflDownTestImpl() {
|
||||
const auto tile_size = GenerateTileSizes();
|
||||
INFO("Tile size: " << tile_size);
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
const auto delta = GENERATE_COPY(range(0u, tile_size));
|
||||
INFO("Delta: " << delta);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
|
||||
const auto warps_in_grid = warps_in_block * grid.block_count_;
|
||||
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
|
||||
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
|
||||
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
|
||||
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
|
||||
coalesced_group_tiled_partition_shfl_down<T, kWarpSize>
|
||||
<<<blocks, threads>>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
|
||||
|
||||
for (auto i = 0u; i < warps_in_grid; ++i) {
|
||||
auto current_warp_mask = active_masks.ptr()[i];
|
||||
const auto shift_amount =
|
||||
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
|
||||
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
|
||||
|
||||
const auto [active_threads, active_thread_count] =
|
||||
coalesce_threads<kWarpSize>(current_warp_mask);
|
||||
|
||||
if (delta >= active_thread_count) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
|
||||
// Step tile-sized window over active threads
|
||||
for (auto t = 0u; t < active_thread_count; t += tile_size) {
|
||||
const auto window_start = t;
|
||||
const auto window_end = t + tile_size - delta;
|
||||
// Iterate through window
|
||||
for (auto k = window_start; k < window_end && k < active_thread_count - delta; ++k) {
|
||||
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
|
||||
const auto expected_val = active_threads[k + delta];
|
||||
const auto actual_val = uint_arr.ptr()[global_thread_idx];
|
||||
INFO("global index: " << global_thread_idx);
|
||||
if (actual_val != expected_val) {
|
||||
REQUIRE(actual_val == expected_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle down behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32,
|
||||
* 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced
|
||||
* group, with memberships of threads in the coalesced group being controlled via a passed in active
|
||||
* mask. The test is run for all overloads of shfl_down.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic", "", int,
|
||||
unsigned int, long, unsigned long, long long, unsigned long long, float,
|
||||
double) {
|
||||
CoalescedGroupTiledPartitonShflDownTestImpl<TestType>();
|
||||
}
|
||||
|
||||
|
||||
template <typename T, size_t warp_size>
|
||||
__global__ void coalesced_group_tiled_partition_shfl(uint64_t* active_masks, uint8_t* target_lanes,
|
||||
T* const out, const unsigned int tile_size) {
|
||||
if (deactivate_thread<warp_size>(active_masks)) {
|
||||
return;
|
||||
}
|
||||
const cg::thread_block_tile<warp_size> warp =
|
||||
cg::tiled_partition<warp_size>(cg::this_thread_block());
|
||||
T var = static_cast<T>(warp.thread_rank());
|
||||
|
||||
const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size);
|
||||
out[thread_rank_in_grid()] = tile.shfl(var, target_lanes[tile.thread_rank()]);
|
||||
}
|
||||
|
||||
template <typename T> static void CoalescedGroupTiledPartitonShflTestImpl() {
|
||||
const auto tile_size = GenerateTileSizes();
|
||||
INFO("Tile size: " << tile_size);
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
LinearAllocGuard<T> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
|
||||
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
|
||||
const auto warps_in_grid = warps_in_block * grid.block_count_;
|
||||
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
LinearAllocGuard<uint8_t> target_lanes_dev(LinearAllocs::hipMalloc, tile_size * sizeof(uint8_t));
|
||||
LinearAllocGuard<uint8_t> target_lanes(LinearAllocs::hipHostMalloc, tile_size * sizeof(uint8_t));
|
||||
|
||||
std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size,
|
||||
[tile_size] { return GenerateRandomInteger(0, static_cast<int>(2 * tile_size)); });
|
||||
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
|
||||
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
|
||||
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size));
|
||||
coalesced_group_tiled_partition_shfl<T, kWarpSize><<<blocks, threads>>>(
|
||||
active_masks_dev.ptr(), target_lanes_dev.ptr(), uint_arr_dev.ptr(), tile_size);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
|
||||
|
||||
for (auto i = 0u; i < warps_in_grid; ++i) {
|
||||
auto current_warp_mask = active_masks.ptr()[i];
|
||||
const auto shift_amount =
|
||||
(tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block);
|
||||
current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount;
|
||||
|
||||
const auto [active_threads, active_thread_count] =
|
||||
coalesce_threads<kWarpSize>(current_warp_mask);
|
||||
|
||||
const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block);
|
||||
// Step tile-sized window over active threads
|
||||
for (auto t = 0u; t < active_thread_count; t += tile_size) {
|
||||
const auto window_start = t;
|
||||
const auto window_end = t + tile_size;
|
||||
// Iterate through window
|
||||
for (auto k = window_start; k < window_end && k < active_thread_count; ++k) {
|
||||
const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails;
|
||||
const auto target_lane = target_lanes.ptr()[k % tile_size];
|
||||
if (target_lane >= tile_size || target_lane >= active_thread_count - t) {
|
||||
continue;
|
||||
}
|
||||
const auto expected_val = active_threads[t + target_lane];
|
||||
const auto actual_val = uint_arr.ptr()[global_thread_idx];
|
||||
INFO("global index: " << global_thread_idx);
|
||||
if (actual_val != expected_val) {
|
||||
REQUIRE(actual_val == expected_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates the shuffle behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32,
|
||||
* 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced
|
||||
* group, with memberships of threads in the coalesced group being controlled via a passed in active
|
||||
* mask. The test is run for all overloads of shfl.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic", "", int,
|
||||
unsigned int, long, unsigned long, long long, unsigned long long, float,
|
||||
double) {
|
||||
CoalescedGroupTiledPartitonShflTestImpl<TestType>();
|
||||
}
|
||||
|
||||
|
||||
template <bool use_global, size_t warp_size, typename T>
|
||||
__global__ void coalesced_group_tiled_partition_sync_check(uint64_t* active_masks, T* global_data,
|
||||
unsigned int* wait_modifiers,
|
||||
size_t tile_size) {
|
||||
if (deactivate_thread<warp_size>(active_masks)) {
|
||||
return;
|
||||
}
|
||||
|
||||
extern __shared__ uint8_t shared_data[];
|
||||
T* const data = use_global ? global_data : reinterpret_cast<T*>(shared_data);
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto block = cg::this_thread_block();
|
||||
const auto coalesced = cg::coalesced_threads();
|
||||
const auto partition = cg::tiled_partition(coalesced, tile_size);
|
||||
const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); };
|
||||
|
||||
const auto wait_modifier = wait_modifiers[tid];
|
||||
|
||||
const auto block_rank = tid / block.size();
|
||||
const auto warp_rank = block.thread_rank() / warp_size;
|
||||
const auto warp_base = block_rank * block.size() + warp_rank * warp_size;
|
||||
const auto global_idx = warp_base + coalesced.thread_rank();
|
||||
|
||||
busy_wait(wait_modifier);
|
||||
data[data_idx(global_idx)] = partition.thread_rank();
|
||||
partition.sync();
|
||||
|
||||
bool valid = true;
|
||||
const auto tile_rank = coalesced.thread_rank() / tile_size;
|
||||
for (auto i = 0u; i < tile_size; ++i) {
|
||||
const auto target_rank_in_tile = (coalesced.thread_rank() + i) % tile_size;
|
||||
const auto target_rank_in_warp = tile_rank * tile_size + target_rank_in_tile;
|
||||
if (target_rank_in_warp >= coalesced.size()) {
|
||||
continue;
|
||||
}
|
||||
if (!(valid &= (data[data_idx(warp_base + target_rank_in_warp)] == target_rank_in_tile))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Validate
|
||||
partition.sync();
|
||||
data[data_idx(global_idx)] = valid;
|
||||
if constexpr (!use_global) {
|
||||
global_data[global_idx] = data[data_idx(global_idx)];
|
||||
}
|
||||
}
|
||||
|
||||
template <bool global_memory, typename T> void CoalescedGroupTiledPartitionSyncTest() {
|
||||
const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations));
|
||||
INFO("Run number: " << randomized_run_count + 1);
|
||||
const auto tile_size = GenerateTileSizes();
|
||||
INFO("Tile size: " << tile_size);
|
||||
auto blocks = GenerateBlockDimensionsForShuffle();
|
||||
auto threads = GenerateThreadDimensionsForShuffle();
|
||||
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
|
||||
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
|
||||
CPUGrid grid(blocks, threads);
|
||||
|
||||
const auto alloc_size = grid.thread_count_ * sizeof(T);
|
||||
const auto alloc_size_per_block = alloc_size / grid.block_count_;
|
||||
int max_shared_mem_per_block = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block,
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
||||
if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) {
|
||||
return;
|
||||
}
|
||||
|
||||
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
|
||||
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
LinearAllocGuard<unsigned int> wait_modifiers_dev(LinearAllocs::hipMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
LinearAllocGuard<unsigned int> wait_modifiers(LinearAllocs::hipHostMalloc,
|
||||
grid.thread_count_ * sizeof(unsigned int));
|
||||
const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize;
|
||||
const auto warps_in_grid = warps_in_block * grid.block_count_;
|
||||
LinearAllocGuard<uint64_t> active_masks_dev(LinearAllocs::hipMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
LinearAllocGuard<uint64_t> active_masks(LinearAllocs::hipHostMalloc,
|
||||
warps_in_grid * sizeof(uint64_t));
|
||||
if (randomized_run_count != 0) {
|
||||
std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_,
|
||||
[] { return GenerateRandomInteger(0u, 1500u); });
|
||||
} else {
|
||||
std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u);
|
||||
}
|
||||
std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid,
|
||||
[] { return GenerateRandomInteger(kMaskMin, kMaskLimit); });
|
||||
|
||||
HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(),
|
||||
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
|
||||
|
||||
const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block;
|
||||
coalesced_group_tiled_partition_sync_check<global_memory, kWarpSize>
|
||||
<<<blocks, threads, shared_memory_size>>>(active_masks_dev.ptr(), arr_dev.ptr(),
|
||||
wait_modifiers_dev.ptr(), tile_size);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_;
|
||||
for (int i = 0u; i < grid.block_count_; ++i) {
|
||||
for (int j = 0u; j < warps_in_block; ++j) {
|
||||
const auto warp_idx = i * warps_in_block + j;
|
||||
auto mask = active_masks.ptr()[warp_idx];
|
||||
const auto shift_amount =
|
||||
(tail + 32 * TestContext::get().isNvidia()) * !((warp_idx + 1) % warps_in_block);
|
||||
mask = (mask << shift_amount) >> shift_amount;
|
||||
const auto active_count = std::bitset<sizeof(mask) * 8>(mask).count();
|
||||
const auto start_offset = i * grid.threads_in_block_count_ + j * kWarpSize;
|
||||
const auto end_offset = start_offset + active_count;
|
||||
const auto valid =
|
||||
std::all_of(arr.ptr() + start_offset, arr.ptr() + end_offset, [](T e) { return e; });
|
||||
if (!valid) {
|
||||
REQUIRE(valid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches a kernel wherein threads in each warp are deactivated based on a passed bitmask.
|
||||
* Coalesced groups are formed and divided into tiled partitions(size of 2, 4, 8, 16, 32, 64 if AMD)
|
||||
* and every thread writes its intra-tile rank into an array slot determined by its global warp rank
|
||||
* and coalesced group rank. The array is either in global or dynamic shared memory based on a
|
||||
* compile time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the
|
||||
* write each thread executes a busy wait loop for a random amount of clock cycles, the amount being
|
||||
* read from an input array. After the write a tile-wide sync is performed and each thread validates
|
||||
* that it can read the expected values that other threads within the same tile have written to
|
||||
* their respective array slots.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/cooperativeGrps/coalesced_group_tiled_partition.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
uint64_t counter = 0;
|
||||
TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic", "", uint8_t,
|
||||
uint16_t, uint32_t) {
|
||||
SECTION("Global memory") { CoalescedGroupTiledPartitionSyncTest<true, TestType>(); }
|
||||
SECTION("Shared memory") { CoalescedGroupTiledPartitionSyncTest<false, TestType>(); }
|
||||
}
|
||||
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
|
||||
/**
|
||||
* @addtogroup coalesced_group thread_block_tile
|
||||
|
||||
@@ -76,3 +76,4 @@ template <class T> bool CheckDimensions(unsigned int device, T kernel, dim3 bloc
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <dlfcn.h>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchCooperativeKernel
|
||||
* @{
|
||||
|
||||
@@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <dlfcn.h>
|
||||
#include <vector>
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
error_handling_common.cc
|
||||
hipGetErrorName.cc
|
||||
hipGetErrorString.cc
|
||||
hipGetLastError.cc
|
||||
hipPeekAtLastError.cc
|
||||
hipDrvGetErrorName.cc
|
||||
hipDrvGetErrorString.cc
|
||||
hipGetLastError.cc
|
||||
hipPeekAtLastError.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME ErrorHandlingTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
|
||||
@@ -0,0 +1,534 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
const char* ErrorName(hipError_t enumerator) {
|
||||
switch (enumerator) {
|
||||
#if HT_AMD
|
||||
case hipSuccess:
|
||||
return "hipSuccess";
|
||||
case hipErrorInvalidValue:
|
||||
return "hipErrorInvalidValue";
|
||||
case hipErrorOutOfMemory:
|
||||
return "hipErrorOutOfMemory";
|
||||
case hipErrorNotInitialized:
|
||||
return "hipErrorNotInitialized";
|
||||
case hipErrorDeinitialized:
|
||||
return "hipErrorDeinitialized";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "hipErrorProfilerDisabled";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
return "hipErrorProfilerNotInitialized";
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "hipErrorProfilerAlreadyStarted";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "hipErrorProfilerAlreadyStopped";
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "hipErrorInvalidConfiguration";
|
||||
case hipErrorInvalidSymbol:
|
||||
return "hipErrorInvalidSymbol";
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "hipErrorInvalidDevicePointer";
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "hipErrorInvalidMemcpyDirection";
|
||||
case hipErrorInsufficientDriver:
|
||||
return "hipErrorInsufficientDriver";
|
||||
case hipErrorMissingConfiguration:
|
||||
return "hipErrorMissingConfiguration";
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "hipErrorPriorLaunchFailure";
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "hipErrorInvalidDeviceFunction";
|
||||
case hipErrorNoDevice:
|
||||
return "hipErrorNoDevice";
|
||||
case hipErrorInvalidDevice:
|
||||
return "hipErrorInvalidDevice";
|
||||
case hipErrorInvalidPitchValue:
|
||||
return "hipErrorInvalidPitchValue";
|
||||
case hipErrorInvalidImage:
|
||||
return "hipErrorInvalidImage";
|
||||
case hipErrorInvalidContext:
|
||||
return "hipErrorInvalidContext";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
return "hipErrorContextAlreadyCurrent";
|
||||
case hipErrorMapFailed:
|
||||
return "hipErrorMapFailed";
|
||||
case hipErrorUnmapFailed:
|
||||
return "hipErrorUnmapFailed";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "hipErrorArrayIsMapped";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "hipErrorAlreadyMapped";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "hipErrorNoBinaryForGpu";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "hipErrorAlreadyAcquired";
|
||||
case hipErrorNotMapped:
|
||||
return "hipErrorNotMapped";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "hipErrorNotMappedAsArray";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "hipErrorNotMappedAsPointer";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "hipErrorECCNotCorrectable";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "hipErrorUnsupportedLimit";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "hipErrorContextAlreadyInUse";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "hipErrorPeerAccessUnsupported";
|
||||
case hipErrorInvalidKernelFile:
|
||||
return "hipErrorInvalidKernelFile";
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "hipErrorInvalidGraphicsContext";
|
||||
case hipErrorInvalidSource:
|
||||
return "hipErrorInvalidSource";
|
||||
case hipErrorFileNotFound:
|
||||
return "hipErrorFileNotFound";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "hipErrorSharedObjectSymbolNotFound";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "hipErrorSharedObjectInitFailed";
|
||||
case hipErrorOperatingSystem:
|
||||
return "hipErrorOperatingSystem";
|
||||
case hipErrorInvalidHandle:
|
||||
return "hipErrorInvalidHandle";
|
||||
case hipErrorIllegalState:
|
||||
return "hipErrorIllegalState";
|
||||
case hipErrorNotFound:
|
||||
return "hipErrorNotFound";
|
||||
case hipErrorNotReady:
|
||||
return "hipErrorNotReady";
|
||||
case hipErrorIllegalAddress:
|
||||
return "hipErrorIllegalAddress";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "hipErrorLaunchOutOfResources";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "hipErrorLaunchTimeOut";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "hipErrorPeerAccessAlreadyEnabled";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "hipErrorPeerAccessNotEnabled";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "hipErrorSetOnActiveProcess";
|
||||
case hipErrorContextIsDestroyed:
|
||||
return "hipErrorContextIsDestroyed";
|
||||
case hipErrorAssert:
|
||||
return "hipErrorAssert";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "hipErrorHostMemoryAlreadyRegistered";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "hipErrorHostMemoryNotRegistered";
|
||||
case hipErrorLaunchFailure:
|
||||
return "hipErrorLaunchFailure";
|
||||
case hipErrorNotSupported:
|
||||
return "hipErrorNotSupported";
|
||||
case hipErrorUnknown:
|
||||
return "hipErrorUnknown";
|
||||
case hipErrorRuntimeMemory:
|
||||
return "hipErrorRuntimeMemory";
|
||||
case hipErrorRuntimeOther:
|
||||
return "hipErrorRuntimeOther";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "hipErrorCooperativeLaunchTooLarge";
|
||||
case hipErrorStreamCaptureUnsupported:
|
||||
return "hipErrorStreamCaptureUnsupported";
|
||||
case hipErrorStreamCaptureInvalidated:
|
||||
return "hipErrorStreamCaptureInvalidated";
|
||||
case hipErrorStreamCaptureMerge:
|
||||
return "hipErrorStreamCaptureMerge";
|
||||
case hipErrorStreamCaptureUnmatched:
|
||||
return "hipErrorStreamCaptureUnmatched";
|
||||
case hipErrorStreamCaptureUnjoined:
|
||||
return "hipErrorStreamCaptureUnjoined";
|
||||
case hipErrorStreamCaptureIsolation:
|
||||
return "hipErrorStreamCaptureIsolation";
|
||||
case hipErrorStreamCaptureImplicit:
|
||||
return "hipErrorStreamCaptureImplicit";
|
||||
case hipErrorCapturedEvent:
|
||||
return "hipErrorCapturedEvent";
|
||||
case hipErrorStreamCaptureWrongThread:
|
||||
return "hipErrorStreamCaptureWrongThread";
|
||||
case hipErrorGraphExecUpdateFailure:
|
||||
return "hipErrorGraphExecUpdateFailure";
|
||||
case hipErrorTbd:
|
||||
return "hipErrorTbd";
|
||||
default:
|
||||
return "hipErrorUnknown";
|
||||
#else
|
||||
case hipSuccess:
|
||||
return "CUDA_SUCCESS";
|
||||
case hipErrorInvalidValue:
|
||||
return "CUDA_ERROR_INVALID_VALUE";
|
||||
case hipErrorOutOfMemory:
|
||||
return "CUDA_ERROR_OUT_OF_MEMORY";
|
||||
case hipErrorNotInitialized:
|
||||
return "CUDA_ERROR_NOT_INITIALIZED";
|
||||
case hipErrorDeinitialized:
|
||||
return "CUDA_ERROR_DEINITIALIZED";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "CUDA_ERROR_PROFILER_DISABLED";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidSymbol:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInsufficientDriver:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorMissingConfiguration:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorNoDevice:
|
||||
return "CUDA_ERROR_NO_DEVICE";
|
||||
case hipErrorInvalidDevice:
|
||||
return "CUDA_ERROR_INVALID_DEVICE";
|
||||
case hipErrorInvalidPitchValue:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidImage:
|
||||
return "CUDA_ERROR_INVALID_IMAGE";
|
||||
case hipErrorInvalidContext:
|
||||
return "CUDA_ERROR_INVALID_CONTEXT";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
|
||||
case hipErrorMapFailed:
|
||||
return "CUDA_ERROR_MAP_FAILED";
|
||||
case hipErrorUnmapFailed:
|
||||
return "CUDA_ERROR_UNMAP_FAILED";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "CUDA_ERROR_ARRAY_IS_MAPPED";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "CUDA_ERROR_ALREADY_MAPPED";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "CUDA_ERROR_NO_BINARY_FOR_GPU";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "CUDA_ERROR_ALREADY_ACQUIRED";
|
||||
case hipErrorNotMapped:
|
||||
return "CUDA_ERROR_NOT_MAPPED";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "CUDA_ERROR_ECC_UNCORRECTABLE";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "CUDA_ERROR_UNSUPPORTED_LIMIT";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
|
||||
case hipErrorInvalidKernelFile:
|
||||
return "CUDA_ERROR_INVALID_PTX";
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
|
||||
case hipErrorInvalidSource:
|
||||
return "CUDA_ERROR_INVALID_SOURCE";
|
||||
case hipErrorFileNotFound:
|
||||
return "CUDA_ERROR_FILE_NOT_FOUND";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
|
||||
case hipErrorOperatingSystem:
|
||||
return "CUDA_ERROR_OPERATING_SYSTEM";
|
||||
case hipErrorInvalidHandle:
|
||||
return "CUDA_ERROR_INVALID_HANDLE";
|
||||
case hipErrorIllegalState:
|
||||
return "CUDA_ERROR_ILLEGAL_STATE";
|
||||
case hipErrorNotFound:
|
||||
return "CUDA_ERROR_NOT_FOUND";
|
||||
case hipErrorNotReady:
|
||||
return "CUDA_ERROR_NOT_READY";
|
||||
case hipErrorIllegalAddress:
|
||||
return "CUDA_ERROR_ILLEGAL_ADDRESS";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "CUDA_ERROR_LAUNCH_TIMEOUT";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
|
||||
case hipErrorContextIsDestroyed:
|
||||
return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
|
||||
case hipErrorAssert:
|
||||
return "CUDA_ERROR_ASSERT";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
|
||||
case hipErrorLaunchFailure:
|
||||
return "CUDA_ERROR_LAUNCH_FAILED";
|
||||
case hipErrorNotSupported:
|
||||
return "CUDA_ERROR_NOT_SUPPORTED";
|
||||
case hipErrorUnknown:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorRuntimeMemory:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorRuntimeOther:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE";
|
||||
case hipErrorStreamCaptureUnsupported:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED";
|
||||
case hipErrorStreamCaptureInvalidated:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED";
|
||||
case hipErrorStreamCaptureMerge:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_MERGE";
|
||||
case hipErrorStreamCaptureUnmatched:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED";
|
||||
case hipErrorStreamCaptureUnjoined:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED";
|
||||
case hipErrorStreamCaptureIsolation:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION";
|
||||
case hipErrorStreamCaptureImplicit:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT";
|
||||
case hipErrorCapturedEvent:
|
||||
return "CUDA_ERROR_CAPTURED_EVENT";
|
||||
case hipErrorStreamCaptureWrongThread:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD";
|
||||
case hipErrorGraphExecUpdateFailure:
|
||||
return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE";
|
||||
default:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
const char* ErrorString(hipError_t enumerator) {
|
||||
switch (enumerator) {
|
||||
case hipSuccess:
|
||||
return "no error";
|
||||
case hipErrorInvalidValue:
|
||||
return "invalid argument";
|
||||
case hipErrorOutOfMemory:
|
||||
return "out of memory";
|
||||
case hipErrorNotInitialized:
|
||||
return "initialization error";
|
||||
case hipErrorDeinitialized:
|
||||
return "driver shutting down";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "profiler disabled while using external profiling tool";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
#if HT_AMD
|
||||
return "profiler is not initialized";
|
||||
#elif HT_NVIDIA
|
||||
return "profiler not initialized: call cudaProfilerInitialize()";
|
||||
#endif
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "profiler already started";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "profiler already stopped";
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "invalid configuration argument";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidPitchValue:
|
||||
return "invalid pitch argument";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidSymbol:
|
||||
return "invalid device symbol";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "invalid device pointer";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "invalid copy direction for memcpy";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInsufficientDriver:
|
||||
return "driver version is insufficient for runtime version";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorMissingConfiguration:
|
||||
return "__global__ function call is not configured";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "unspecified launch failure in prior launch";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "invalid device function";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
case hipErrorNoDevice:
|
||||
#if HT_AMD
|
||||
return "no ROCm-capable device is detected";
|
||||
#elif HT_NVIDIA
|
||||
return "no CUDA-capable device is detected";
|
||||
#endif
|
||||
case hipErrorInvalidDevice:
|
||||
return "invalid device ordinal";
|
||||
case hipErrorInvalidImage:
|
||||
return "device kernel image is invalid";
|
||||
case hipErrorInvalidContext:
|
||||
return "invalid device context";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
#if HT_AMD
|
||||
return "context is already current context";
|
||||
#elif HT_NVIDIA
|
||||
return "context already current";
|
||||
#endif
|
||||
case hipErrorMapFailed:
|
||||
return "mapping of buffer object failed";
|
||||
case hipErrorUnmapFailed:
|
||||
return "unmapping of buffer object failed";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "array is mapped";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "resource already mapped";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "no kernel image is available for execution on the device";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "resource already acquired";
|
||||
case hipErrorNotMapped:
|
||||
return "resource not mapped";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "resource not mapped as array";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "resource not mapped as pointer";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "uncorrectable ECC error encountered";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "limit is not supported on this architecture";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "exclusive-thread device already in use by a different thread";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "peer access is not supported between these two devices";
|
||||
case hipErrorInvalidKernelFile:
|
||||
#if HT_AMD
|
||||
return "invalid kernel file";
|
||||
#elif HT_NVIDIA
|
||||
return "a PTX JIT compilation failed";
|
||||
#endif
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "invalid OpenGL or DirectX context";
|
||||
case hipErrorInvalidSource:
|
||||
return "device kernel image is invalid";
|
||||
case hipErrorFileNotFound:
|
||||
return "file not found";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "shared object symbol not found";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "shared object initialization failed";
|
||||
case hipErrorOperatingSystem:
|
||||
return "OS call failed or operation not supported on this OS";
|
||||
case hipErrorInvalidHandle:
|
||||
return "invalid resource handle";
|
||||
case hipErrorIllegalState:
|
||||
return "the operation cannot be performed in the present state";
|
||||
case hipErrorNotFound:
|
||||
return "named symbol not found";
|
||||
case hipErrorNotReady:
|
||||
return "device not ready";
|
||||
case hipErrorIllegalAddress:
|
||||
return "an illegal memory access was encountered";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "too many resources requested for launch";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "the launch timed out and was terminated";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "peer access is already enabled";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "peer access has not been enabled";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "cannot set while device is active in this process";
|
||||
case hipErrorContextIsDestroyed:
|
||||
return "context is destroyed";
|
||||
case hipErrorAssert:
|
||||
return "device-side assert triggered";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "part or all of the requested memory range is already mapped";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "pointer does not correspond to a registered memory region";
|
||||
case hipErrorLaunchFailure:
|
||||
return "unspecified launch failure";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "too many blocks in cooperative launch";
|
||||
case hipErrorNotSupported:
|
||||
return "operation not supported";
|
||||
case hipErrorStreamCaptureUnsupported:
|
||||
return "operation not permitted when stream is capturing";
|
||||
case hipErrorStreamCaptureInvalidated:
|
||||
return "operation failed due to a previous error during capture";
|
||||
case hipErrorStreamCaptureMerge:
|
||||
return "operation would result in a merge of separate capture sequences";
|
||||
case hipErrorStreamCaptureUnmatched:
|
||||
return "capture was not ended in the same stream as it began";
|
||||
case hipErrorStreamCaptureUnjoined:
|
||||
return "capturing stream has unjoined work";
|
||||
case hipErrorStreamCaptureIsolation:
|
||||
return "dependency created on uncaptured work in another stream";
|
||||
case hipErrorStreamCaptureImplicit:
|
||||
return "operation would make the legacy stream depend on a capturing blocking stream"; // NOLINT
|
||||
case hipErrorCapturedEvent:
|
||||
return "operation not permitted on an event last recorded in a capturing stream"; // NOLINT
|
||||
case hipErrorStreamCaptureWrongThread:
|
||||
return "attempt to terminate a thread-local capture sequence from another thread"; // NOLINT
|
||||
case hipErrorGraphExecUpdateFailure:
|
||||
return "the graph update was not performed because it included changes which violated "
|
||||
"constraints specific to instantiated graph update"; // NOLINT
|
||||
case hipErrorRuntimeMemory:
|
||||
return "runtime memory call returned error";
|
||||
case hipErrorRuntimeOther:
|
||||
return "runtime call other than memory returned error";
|
||||
case hipErrorUnknown:
|
||||
default:
|
||||
return "unknown error";
|
||||
}
|
||||
}
|
||||
+10
-6
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -33,7 +33,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess,
|
||||
hipErrorProfilerNotInitialized,
|
||||
hipErrorProfilerAlreadyStarted,
|
||||
hipErrorProfilerAlreadyStopped,
|
||||
#if HT_AMD
|
||||
#if HT_AMD
|
||||
hipErrorInvalidConfiguration,
|
||||
hipErrorInvalidPitchValue,
|
||||
hipErrorInvalidSymbol,
|
||||
@@ -43,7 +43,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess,
|
||||
hipErrorMissingConfiguration,
|
||||
hipErrorPriorLaunchFailure,
|
||||
hipErrorInvalidDeviceFunction,
|
||||
#endif
|
||||
#endif
|
||||
hipErrorNoDevice,
|
||||
hipErrorInvalidDevice,
|
||||
hipErrorInvalidImage,
|
||||
@@ -97,8 +97,12 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess,
|
||||
hipErrorStreamCaptureWrongThread,
|
||||
hipErrorGraphExecUpdateFailure,
|
||||
hipErrorUnknown,
|
||||
#if HT_AMD
|
||||
#if HT_AMD
|
||||
hipErrorRuntimeMemory,
|
||||
hipErrorRuntimeOther
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
};
|
||||
|
||||
const char* ErrorName(hipError_t enumerator);
|
||||
|
||||
const char* ErrorString(hipError_t enumerator);
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -17,347 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include "errorEnumerators.h"
|
||||
|
||||
// Local Function to return the error code in string
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
static const char *ErrorName(hipError_t enumerator) {
|
||||
switch (enumerator) {
|
||||
#if HT_AMD
|
||||
case hipSuccess:
|
||||
return "hipSuccess";
|
||||
case hipErrorInvalidValue:
|
||||
return "hipErrorInvalidValue";
|
||||
case hipErrorOutOfMemory:
|
||||
return "hipErrorOutOfMemory";
|
||||
case hipErrorNotInitialized:
|
||||
return "hipErrorNotInitialized";
|
||||
case hipErrorDeinitialized:
|
||||
return "hipErrorDeinitialized";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "hipErrorProfilerDisabled";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
return "hipErrorProfilerNotInitialized";
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "hipErrorProfilerAlreadyStarted";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "hipErrorProfilerAlreadyStopped";
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "hipErrorInvalidConfiguration";
|
||||
case hipErrorInvalidSymbol:
|
||||
return "hipErrorInvalidSymbol";
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "hipErrorInvalidDevicePointer";
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "hipErrorInvalidMemcpyDirection";
|
||||
case hipErrorInsufficientDriver:
|
||||
return "hipErrorInsufficientDriver";
|
||||
case hipErrorMissingConfiguration:
|
||||
return "hipErrorMissingConfiguration";
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "hipErrorPriorLaunchFailure";
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "hipErrorInvalidDeviceFunction";
|
||||
case hipErrorNoDevice:
|
||||
return "hipErrorNoDevice";
|
||||
case hipErrorInvalidDevice:
|
||||
return "hipErrorInvalidDevice";
|
||||
case hipErrorInvalidPitchValue:
|
||||
return "hipErrorInvalidPitchValue";
|
||||
case hipErrorInvalidImage:
|
||||
return "hipErrorInvalidImage";
|
||||
case hipErrorInvalidContext:
|
||||
return "hipErrorInvalidContext";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
return "hipErrorContextAlreadyCurrent";
|
||||
case hipErrorMapFailed:
|
||||
return "hipErrorMapFailed";
|
||||
case hipErrorUnmapFailed:
|
||||
return "hipErrorUnmapFailed";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "hipErrorArrayIsMapped";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "hipErrorAlreadyMapped";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "hipErrorNoBinaryForGpu";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "hipErrorAlreadyAcquired";
|
||||
case hipErrorNotMapped:
|
||||
return "hipErrorNotMapped";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "hipErrorNotMappedAsArray";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "hipErrorNotMappedAsPointer";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "hipErrorECCNotCorrectable";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "hipErrorUnsupportedLimit";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "hipErrorContextAlreadyInUse";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "hipErrorPeerAccessUnsupported";
|
||||
case hipErrorInvalidKernelFile:
|
||||
return "hipErrorInvalidKernelFile";
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "hipErrorInvalidGraphicsContext";
|
||||
case hipErrorInvalidSource:
|
||||
return "hipErrorInvalidSource";
|
||||
case hipErrorFileNotFound:
|
||||
return "hipErrorFileNotFound";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "hipErrorSharedObjectSymbolNotFound";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "hipErrorSharedObjectInitFailed";
|
||||
case hipErrorOperatingSystem:
|
||||
return "hipErrorOperatingSystem";
|
||||
case hipErrorInvalidHandle:
|
||||
return "hipErrorInvalidHandle";
|
||||
case hipErrorIllegalState:
|
||||
return "hipErrorIllegalState";
|
||||
case hipErrorNotFound:
|
||||
return "hipErrorNotFound";
|
||||
case hipErrorNotReady:
|
||||
return "hipErrorNotReady";
|
||||
case hipErrorIllegalAddress:
|
||||
return "hipErrorIllegalAddress";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "hipErrorLaunchOutOfResources";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "hipErrorLaunchTimeOut";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "hipErrorPeerAccessAlreadyEnabled";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "hipErrorPeerAccessNotEnabled";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "hipErrorSetOnActiveProcess";
|
||||
case hipErrorContextIsDestroyed:
|
||||
return "hipErrorContextIsDestroyed";
|
||||
case hipErrorAssert:
|
||||
return "hipErrorAssert";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "hipErrorHostMemoryAlreadyRegistered";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "hipErrorHostMemoryNotRegistered";
|
||||
case hipErrorLaunchFailure:
|
||||
return "hipErrorLaunchFailure";
|
||||
case hipErrorNotSupported:
|
||||
return "hipErrorNotSupported";
|
||||
case hipErrorUnknown:
|
||||
return "hipErrorUnknown";
|
||||
case hipErrorRuntimeMemory:
|
||||
return "hipErrorRuntimeMemory";
|
||||
case hipErrorRuntimeOther:
|
||||
return "hipErrorRuntimeOther";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "hipErrorCooperativeLaunchTooLarge";
|
||||
case hipErrorStreamCaptureUnsupported:
|
||||
return "hipErrorStreamCaptureUnsupported";
|
||||
case hipErrorStreamCaptureInvalidated:
|
||||
return "hipErrorStreamCaptureInvalidated";
|
||||
case hipErrorStreamCaptureMerge:
|
||||
return "hipErrorStreamCaptureMerge";
|
||||
case hipErrorStreamCaptureUnmatched:
|
||||
return "hipErrorStreamCaptureUnmatched";
|
||||
case hipErrorStreamCaptureUnjoined:
|
||||
return "hipErrorStreamCaptureUnjoined";
|
||||
case hipErrorStreamCaptureIsolation:
|
||||
return "hipErrorStreamCaptureIsolation";
|
||||
case hipErrorStreamCaptureImplicit:
|
||||
return "hipErrorStreamCaptureImplicit";
|
||||
case hipErrorCapturedEvent:
|
||||
return "hipErrorCapturedEvent";
|
||||
case hipErrorStreamCaptureWrongThread:
|
||||
return "hipErrorStreamCaptureWrongThread";
|
||||
case hipErrorGraphExecUpdateFailure:
|
||||
return "hipErrorGraphExecUpdateFailure";
|
||||
case hipErrorTbd:
|
||||
return "hipErrorTbd";
|
||||
default:
|
||||
return "hipErrorUnknown";
|
||||
#endif
|
||||
#if HT_NVIDIA
|
||||
case hipSuccess:
|
||||
return "CUDA_SUCCESS";
|
||||
case hipErrorInvalidValue:
|
||||
return "CUDA_ERROR_INVALID_VALUE";
|
||||
case hipErrorOutOfMemory:
|
||||
return "CUDA_ERROR_OUT_OF_MEMORY";
|
||||
case hipErrorNotInitialized:
|
||||
return "CUDA_ERROR_NOT_INITIALIZED";
|
||||
case hipErrorDeinitialized:
|
||||
return "CUDA_ERROR_DEINITIALIZED";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "CUDA_ERROR_PROFILER_DISABLED";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidSymbol:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInsufficientDriver:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorMissingConfiguration:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorNoDevice:
|
||||
return "CUDA_ERROR_NO_DEVICE";
|
||||
case hipErrorInvalidDevice:
|
||||
return "CUDA_ERROR_INVALID_DEVICE";
|
||||
case hipErrorInvalidPitchValue:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorInvalidImage:
|
||||
return "CUDA_ERROR_INVALID_IMAGE";
|
||||
case hipErrorInvalidContext:
|
||||
return "CUDA_ERROR_INVALID_CONTEXT";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
|
||||
case hipErrorMapFailed:
|
||||
return "CUDA_ERROR_MAP_FAILED";
|
||||
case hipErrorUnmapFailed:
|
||||
return "CUDA_ERROR_UNMAP_FAILED";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "CUDA_ERROR_ARRAY_IS_MAPPED";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "CUDA_ERROR_ALREADY_MAPPED";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "CUDA_ERROR_NO_BINARY_FOR_GPU";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "CUDA_ERROR_ALREADY_ACQUIRED";
|
||||
case hipErrorNotMapped:
|
||||
return "CUDA_ERROR_NOT_MAPPED";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "CUDA_ERROR_ECC_UNCORRECTABLE";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "CUDA_ERROR_UNSUPPORTED_LIMIT";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
|
||||
case hipErrorInvalidKernelFile:
|
||||
return "CUDA_ERROR_INVALID_PTX";
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
|
||||
case hipErrorInvalidSource:
|
||||
return "CUDA_ERROR_INVALID_SOURCE";
|
||||
case hipErrorFileNotFound:
|
||||
return "CUDA_ERROR_FILE_NOT_FOUND";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
|
||||
case hipErrorOperatingSystem:
|
||||
return "CUDA_ERROR_OPERATING_SYSTEM";
|
||||
case hipErrorInvalidHandle:
|
||||
return "CUDA_ERROR_INVALID_HANDLE";
|
||||
case hipErrorIllegalState:
|
||||
return "CUDA_ERROR_ILLEGAL_STATE";
|
||||
case hipErrorNotFound:
|
||||
return "CUDA_ERROR_NOT_FOUND";
|
||||
case hipErrorNotReady:
|
||||
return "CUDA_ERROR_NOT_READY";
|
||||
case hipErrorIllegalAddress:
|
||||
return "CUDA_ERROR_ILLEGAL_ADDRESS";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "CUDA_ERROR_LAUNCH_TIMEOUT";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
|
||||
case hipErrorContextIsDestroyed:
|
||||
return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
|
||||
case hipErrorAssert:
|
||||
return "CUDA_ERROR_ASSERT";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
|
||||
case hipErrorLaunchFailure:
|
||||
return "CUDA_ERROR_LAUNCH_FAILED";
|
||||
case hipErrorNotSupported:
|
||||
return "CUDA_ERROR_NOT_SUPPORTED";
|
||||
case hipErrorUnknown:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorRuntimeMemory:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorRuntimeOther:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE";
|
||||
case hipErrorStreamCaptureUnsupported:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED";
|
||||
case hipErrorStreamCaptureInvalidated:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED";
|
||||
case hipErrorStreamCaptureMerge:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_MERGE";
|
||||
case hipErrorStreamCaptureUnmatched:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED";
|
||||
case hipErrorStreamCaptureUnjoined:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED";
|
||||
case hipErrorStreamCaptureIsolation:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION";
|
||||
case hipErrorStreamCaptureImplicit:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT";
|
||||
case hipErrorCapturedEvent:
|
||||
return "CUDA_ERROR_CAPTURED_EVENT";
|
||||
case hipErrorStreamCaptureWrongThread:
|
||||
return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD";
|
||||
case hipErrorGraphExecUpdateFailure:
|
||||
return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE";
|
||||
default:
|
||||
return "CUDA_ERROR_UNKNOWN";
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
|
||||
* @{
|
||||
* @ingroup ErrorTest
|
||||
* `hipDrvGetErrorName(hipError_t hip_error)` -
|
||||
* Return hip error as text string form.
|
||||
*/
|
||||
|
||||
// Functional test case
|
||||
// Test case to verify the returned error name is same as generated error name.
|
||||
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Functional") {
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that the correct string is returned for each supported
|
||||
* device error enumeration.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
hipError_t error_ret;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators),
|
||||
std::end(kErrorEnumerators)));
|
||||
error_ret = hipDrvGetErrorName(enumerator, &error_string);
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
|
||||
|
||||
REQUIRE(error_string != nullptr);
|
||||
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
|
||||
REQUIRE(error_ret == hipSuccess);
|
||||
}
|
||||
|
||||
// Negative test cases.
|
||||
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Negative") {
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate handling of invalid arguments:
|
||||
* -# When error enumerator is invalid (-1)
|
||||
* - AMD expected output: return "hipErrorUnknown"
|
||||
* - NVIDIA expected output: return "cudaErrorUnknown"
|
||||
* -# When nullptr is passed as store location
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
|
||||
const char* error_string = nullptr;
|
||||
SECTION("pass unknown value to hipError") {
|
||||
REQUIRE((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string))
|
||||
== hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("pass nullptr to error string") {
|
||||
REQUIRE((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr))
|
||||
== hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -17,247 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include "errorEnumerators.h"
|
||||
|
||||
// Local Function to return the error string.
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
static const char *ErrorString(hipError_t enumerator) {
|
||||
switch (enumerator) {
|
||||
case hipSuccess:
|
||||
return "no error";
|
||||
case hipErrorInvalidValue:
|
||||
return "invalid argument";
|
||||
case hipErrorOutOfMemory:
|
||||
return "out of memory";
|
||||
case hipErrorNotInitialized:
|
||||
return "initialization error";
|
||||
case hipErrorDeinitialized:
|
||||
return "driver shutting down";
|
||||
case hipErrorProfilerDisabled:
|
||||
return "profiler disabled while using external profiling tool";
|
||||
case hipErrorProfilerNotInitialized:
|
||||
#if HT_AMD
|
||||
return "profiler is not initialized";
|
||||
#elif HT_NVIDIA
|
||||
return "profiler not initialized: call cudaProfilerInitialize()";
|
||||
#endif
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
return "profiler already started";
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
return "profiler already stopped";
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidConfiguration:
|
||||
return "invalid configuration argument";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidPitchValue:
|
||||
return "invalid pitch argument";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidSymbol:
|
||||
return "invalid device symbol";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidDevicePointer:
|
||||
return "invalid device pointer";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidMemcpyDirection:
|
||||
return "invalid copy direction for memcpy";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInsufficientDriver:
|
||||
return "driver version is insufficient for runtime version";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorMissingConfiguration:
|
||||
return "__global__ function call is not configured";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "unspecified launch failure in prior launch";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
#if HT_AMD
|
||||
case hipErrorInvalidDeviceFunction:
|
||||
return "invalid device function";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
case hipErrorNoDevice:
|
||||
#if HT_AMD
|
||||
return "no ROCm-capable device is detected";
|
||||
#elif HT_NVIDIA
|
||||
return "no CUDA-capable device is detected";
|
||||
#endif
|
||||
case hipErrorInvalidDevice:
|
||||
return "invalid device ordinal";
|
||||
case hipErrorInvalidImage:
|
||||
return "device kernel image is invalid";
|
||||
case hipErrorInvalidContext:
|
||||
return "invalid device context";
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
#if HT_AMD
|
||||
return "context is already current context";
|
||||
#elif HT_NVIDIA
|
||||
return "context already current";
|
||||
#endif
|
||||
case hipErrorMapFailed:
|
||||
return "mapping of buffer object failed";
|
||||
case hipErrorUnmapFailed:
|
||||
return "unmapping of buffer object failed";
|
||||
case hipErrorArrayIsMapped:
|
||||
return "array is mapped";
|
||||
case hipErrorAlreadyMapped:
|
||||
return "resource already mapped";
|
||||
case hipErrorNoBinaryForGpu:
|
||||
return "no kernel image is available for execution on the device";
|
||||
case hipErrorAlreadyAcquired:
|
||||
return "resource already acquired";
|
||||
case hipErrorNotMapped:
|
||||
return "resource not mapped";
|
||||
case hipErrorNotMappedAsArray:
|
||||
return "resource not mapped as array";
|
||||
case hipErrorNotMappedAsPointer:
|
||||
return "resource not mapped as pointer";
|
||||
case hipErrorECCNotCorrectable:
|
||||
return "uncorrectable ECC error encountered";
|
||||
case hipErrorUnsupportedLimit:
|
||||
return "limit is not supported on this architecture";
|
||||
case hipErrorContextAlreadyInUse:
|
||||
return "exclusive-thread device already in use by a different thread";
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
return "peer access is not supported between these two devices";
|
||||
case hipErrorInvalidKernelFile:
|
||||
#if HT_AMD
|
||||
return "invalid kernel file";
|
||||
#elif HT_NVIDIA
|
||||
return "a PTX JIT compilation failed";
|
||||
#endif
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
return "invalid OpenGL or DirectX context";
|
||||
case hipErrorInvalidSource:
|
||||
return "device kernel image is invalid";
|
||||
case hipErrorFileNotFound:
|
||||
return "file not found";
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
return "shared object symbol not found";
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
return "shared object initialization failed";
|
||||
case hipErrorOperatingSystem:
|
||||
return "OS call failed or operation not supported on this OS";
|
||||
case hipErrorInvalidHandle:
|
||||
return "invalid resource handle";
|
||||
case hipErrorIllegalState:
|
||||
return "the operation cannot be performed in the present state";
|
||||
case hipErrorNotFound:
|
||||
return "named symbol not found";
|
||||
case hipErrorNotReady:
|
||||
return "device not ready";
|
||||
case hipErrorIllegalAddress:
|
||||
return "an illegal memory access was encountered";
|
||||
case hipErrorLaunchOutOfResources:
|
||||
return "too many resources requested for launch";
|
||||
case hipErrorLaunchTimeOut:
|
||||
return "the launch timed out and was terminated";
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
return "peer access is already enabled";
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
return "peer access has not been enabled";
|
||||
case hipErrorSetOnActiveProcess:
|
||||
return "cannot set while device is active in this process";
|
||||
case hipErrorContextIsDestroyed:
|
||||
return "context is destroyed";
|
||||
case hipErrorAssert:
|
||||
return "device-side assert triggered";
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
return "part or all of the requested memory range is already mapped";
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
return "pointer does not correspond to a registered memory region";
|
||||
case hipErrorLaunchFailure:
|
||||
return "unspecified launch failure";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "too many blocks in cooperative launch";
|
||||
case hipErrorNotSupported:
|
||||
return "operation not supported";
|
||||
case hipErrorStreamCaptureUnsupported:
|
||||
return "operation not permitted when stream is capturing";
|
||||
case hipErrorStreamCaptureInvalidated:
|
||||
return "operation failed due to a previous error during capture";
|
||||
case hipErrorStreamCaptureMerge:
|
||||
return "operation would result in a merge of separate capture sequences";
|
||||
case hipErrorStreamCaptureUnmatched:
|
||||
return "capture was not ended in the same stream as it began";
|
||||
case hipErrorStreamCaptureUnjoined:
|
||||
return "capturing stream has unjoined work";
|
||||
case hipErrorStreamCaptureIsolation:
|
||||
return "dependency created on uncaptured work in another stream";
|
||||
case hipErrorStreamCaptureImplicit:
|
||||
return "operation would make the legacy stream depend on a capturing blocking stream"; //NOLINT
|
||||
case hipErrorCapturedEvent:
|
||||
return "operation not permitted on an event last recorded in a capturing stream"; //NOLINT
|
||||
case hipErrorStreamCaptureWrongThread:
|
||||
return "attempt to terminate a thread-local capture sequence from another thread"; //NOLINT
|
||||
case hipErrorGraphExecUpdateFailure:
|
||||
return "the graph update was not performed because it included changes which violated constraints specific to instantiated graph update"; //NOLINT
|
||||
case hipErrorRuntimeMemory:
|
||||
return "runtime memory call returned error";
|
||||
case hipErrorRuntimeOther:
|
||||
return "runtime call other than memory returned error";
|
||||
case hipErrorUnknown:
|
||||
default:
|
||||
#if HT_AMD
|
||||
return "unknown error";
|
||||
#elif HT_NVIDIA
|
||||
return "unknown error";
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
|
||||
* @{
|
||||
* @ingroup ErrorTest
|
||||
* `hipDrvGetErrorString(hipError_t hipError)` -
|
||||
* Return handy text string message to explain the error which occurred.
|
||||
*/
|
||||
|
||||
// Test case to verify the returned error string is
|
||||
// same as generated error string.
|
||||
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Functional") {
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that the correct string is returned for each supported
|
||||
* device error enumeration.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators),
|
||||
std::end(kErrorEnumerators)));
|
||||
hipError_t error_ret = hipDrvGetErrorString(enumerator, &error_string);
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
|
||||
|
||||
REQUIRE(error_string != nullptr);
|
||||
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
|
||||
REQUIRE(error_ret == hipSuccess);
|
||||
}
|
||||
|
||||
// Negative test cases.
|
||||
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Negative") {
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate handling of invalid arguments:
|
||||
* -# When error enumerator is invalid (-1)
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* -# When nullptr is passed as store location
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
|
||||
const char* error_string = nullptr;
|
||||
SECTION("pass unknown value to hipError") {
|
||||
REQUIRE((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string))
|
||||
== hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("pass nullptr to error string") {
|
||||
REQUIRE((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr))
|
||||
== hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -20,10 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "errorEnumerators.h"
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipGetErrorName hipGetErrorName
|
||||
@@ -49,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorName_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
error_string = hipGetErrorName(enumerator);
|
||||
|
||||
|
||||
@@ -20,9 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "errorEnumerators.h"
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipGetErrorString hipGetErrorString
|
||||
@@ -48,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorString_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
error_string = hipGetErrorString(enumerator);
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <threaded_zig_zag_test.hh>
|
||||
|
||||
/**
|
||||
@@ -56,7 +55,8 @@ TEST_CASE("Unit_hipPeekAtLastError_Positive_Basic") {
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that appropriate error is returned when working with multiple threads.
|
||||
* - Validate that appropriate error is returned for getting the last erro when working with multiple threads.
|
||||
* - Validate that appropriate error is returned for getting the last error when working with
|
||||
* multiple threads.
|
||||
* - Cause error on purpose within one of the threads.
|
||||
* Test source
|
||||
* ------------------------
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
constexpr size_t buffer_size = (1024*1024);
|
||||
|
||||
@@ -4,6 +4,7 @@ set(TEST_SRC
|
||||
hipFuncSetSharedMemConfig.cc
|
||||
hipFuncSetAttribute.cc
|
||||
hipFuncGetAttributes.cc
|
||||
hipLaunchKernel.cc
|
||||
hipLaunchCooperativeKernel.cc
|
||||
hipLaunchCooperativeKernelMultiDevice.cc
|
||||
)
|
||||
@@ -12,6 +13,7 @@ if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC ${TEST_SRC}
|
||||
hipExtLaunchKernel.cc
|
||||
hipExtLaunchMultiKernelMultiDevice.cc
|
||||
launch_api.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -49,19 +49,19 @@ TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") {
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") {
|
||||
SECTION("blockDim.x == maxBlockDimX") {
|
||||
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX);
|
||||
const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0);
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{x, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
}
|
||||
|
||||
SECTION("blockDim.y == maxBlockDimY") {
|
||||
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY);
|
||||
const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0);
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{y, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
}
|
||||
|
||||
SECTION("blockDim.z == maxBlockDimZ") {
|
||||
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ);
|
||||
const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0);
|
||||
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{z, 1, 1},
|
||||
nullptr, 0, nullptr, nullptr, nullptr, 0u));
|
||||
}
|
||||
@@ -111,28 +111,28 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") {
|
||||
}
|
||||
|
||||
SECTION("blockDim.x > maxBlockDimX") {
|
||||
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u;
|
||||
const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.y > maxBlockDimY") {
|
||||
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u;
|
||||
const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.z > maxBlockDimZ") {
|
||||
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u;
|
||||
const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorInvalidConfiguration);
|
||||
}
|
||||
|
||||
SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") {
|
||||
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock);
|
||||
const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0);
|
||||
const unsigned int dim = std::ceil(std::cbrt(max));
|
||||
HIP_CHECK_ERROR(
|
||||
hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim},
|
||||
@@ -141,7 +141,7 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") {
|
||||
}
|
||||
|
||||
SECTION("sharedMemBytes > maxSharedMemoryPerBlock") {
|
||||
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u;
|
||||
const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u;
|
||||
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u),
|
||||
hipErrorOutOfMemory);
|
||||
|
||||
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Ссылка в новой задаче
Block a user