From 405d029422ba8bb6be5a233d5eebedd2ad2e8bd3 Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary Date: Wed, 13 Sep 2023 18:02:31 +0100 Subject: [PATCH] SWDEV-420894 - Remove directed tests Point users to correct hip-tests path. Remove references of directed test from code. Replace documentation which used directed test with a native sample. Change-Id: I3e8b5b5b40c0847e03d3aa637ea5b0c0d06c1201 --- docs/developer_guide/build.md | 39 +- docs/developer_guide/contributing.md | 20 +- docs/how_to_guides/debugging.md | 132 +- docs/user_guide/programming_manual.md | 3 - include/hip/hip_ext.h | 3 - tests/README.md | 185 --- tests/Tests.cmake | 52 - tests/hit/HIT.cmake | 506 ------- tests/hit/parser | 139 -- tests/src/.gitignore | 0 .../device/hipFuncDeviceSynchronize.cpp | 76 -- .../Functional/device/hipFuncGetDevice.cpp | 43 - .../Functional/device/hipFuncSetDevice.cpp | 36 - .../device/hipFuncSetDeviceFlags.cpp | 49 - tests/src/Functional/host/hipFloat16.cpp | 59 - tests/src/Makefile | 21 - .../Negative/Device/hipDeviceGetAttribute.cpp | 20 - tests/src/Negative/Device/hipDeviceUtil.h | 11 - tests/src/Negative/Device/hipGetDevice.cpp | 9 - .../src/Negative/Device/hipGetDeviceCount.cpp | 9 - .../Device/hipGetDeviceProperties.cpp | 13 - tests/src/Negative/Device/hipSetDevice.cpp | 19 - .../Negative/memory/hipMemcpyFromSymbol.cpp | 46 - .../memory/hipMemcpyFromSymbolAsync.cpp | 49 - .../src/Negative/memory/hipMemcpyToSymbol.cpp | 46 - .../memory/hipMemcpyToSymbolAsync.cpp | 49 - tests/src/Negative/memory/hipMemory.cpp | 43 - .../stream/hipStreamCreateWithFlags.cpp | 40 - tests/src/clara/clara.hpp | 1165 ---------------- tests/src/compiler/hipClassKernel.cpp | 355 ----- tests/src/compiler/hipClassKernel.h | 239 ---- tests/src/context/hipCtx_simple.cpp | 52 - tests/src/context/hipDrvGetPCIBusId.cpp | 35 - tests/src/context/hipDrvMemcpy.cpp | 44 - tests/src/context/hipMemsetD8.cpp | 64 - tests/src/cppstd/hipInvocable11.cpp | 31 - tests/src/cppstd/hipInvocable11Fallback.cpp | 34 - tests/src/cppstd/hipInvocable14.cpp | 31 - tests/src/cppstd/hipInvocable17.cpp | 31 - tests/src/cppstd/is_callable_test.hpp | 209 --- tests/src/cudaRegister.cu | 90 -- tests/src/deviceLib/hipBfloat16.cpp | 137 -- tests/src/deviceLib/hipComplex.cpp | 527 -------- tests/src/deviceLib/hipDeviceMalloc.cpp | 188 --- tests/src/deviceLib/hipDeviceMemcpy.cpp | 60 - .../hipDoublePrecisionIntrinsics.cpp | 90 -- .../hipDoublePrecisionMathDevice.cpp | 147 -- .../deviceLib/hipDoublePrecisionMathHost.cpp | 158 --- tests/src/deviceLib/hipFloatMath.cpp | 66 - tests/src/deviceLib/hipFloatMathPrecise.cpp | 132 -- tests/src/deviceLib/hipHalf2Comparision.cpp | 341 ----- tests/src/deviceLib/hipIntegerIntrinsics.cpp | 78 -- tests/src/deviceLib/hipLaunchKernelFunc.cpp | 193 --- tests/src/deviceLib/hipMathFunctions.cpp | 200 --- tests/src/deviceLib/hipSimpleAtomicsTest.cpp | 365 ----- .../hipSinglePrecisionIntrinsics.cpp | 111 -- .../hipSinglePrecisionMathDevice.cpp | 139 -- .../deviceLib/hipSinglePrecisionMathHost.cpp | 152 --- tests/src/deviceLib/hipStdComplex.cpp | 167 --- tests/src/deviceLib/hipTestAtomicAdd.cpp | 329 ----- tests/src/deviceLib/hipTestClock.cpp | 86 -- tests/src/deviceLib/hipTestDevice.cpp | 742 ---------- tests/src/deviceLib/hipTestDeviceDouble.cpp | 664 --------- tests/src/deviceLib/hipTestDeviceLimit.cpp | 28 - tests/src/deviceLib/hipTestDeviceSymbol.cpp | 131 -- tests/src/deviceLib/hipTestDotFunctions.cpp | 69 - tests/src/deviceLib/hipTestFMA.cpp | 187 --- tests/src/deviceLib/hipTestHalf.cpp | 294 ---- tests/src/deviceLib/hipTestHost.cpp | 384 ------ tests/src/deviceLib/hipTestIncludeMath.cpp | 151 --- tests/src/deviceLib/hipTestNativeHalf.cpp | 188 --- tests/src/deviceLib/hipTestNew.cpp | 69 - tests/src/deviceLib/hipThreadFence.cpp | 73 - tests/src/deviceLib/hipVectorTypes.cpp | 227 ---- tests/src/deviceLib/hipVectorTypesDevice.cpp | 319 ----- tests/src/deviceLib/hipVersion.cpp | 42 - tests/src/deviceLib/hip_anyall.cpp | 100 -- tests/src/deviceLib/hip_ballot.cpp | 97 -- tests/src/deviceLib/hip_bitextract.cpp | 221 --- tests/src/deviceLib/hip_bitinsert.cpp | 239 ---- tests/src/deviceLib/hip_brev.cpp | 175 --- tests/src/deviceLib/hip_clz.cpp | 190 --- tests/src/deviceLib/hip_ffs.cpp | 170 --- tests/src/deviceLib/hip_floatnTM.cpp | 239 ---- tests/src/deviceLib/hip_funnelshift.cpp | 252 ---- tests/src/deviceLib/hip_mbcnt.cpp | 128 -- tests/src/deviceLib/hip_popc.cpp | 166 --- tests/src/deviceLib/hip_test_ldg.cpp | 332 ----- tests/src/deviceLib/hip_test_make_type.cpp | 383 ------ .../deviceLib/hip_test_syncthreads_and.cpp | 155 --- .../deviceLib/hip_test_syncthreads_count.cpp | 169 --- .../src/deviceLib/hip_test_syncthreads_or.cpp | 155 --- .../src/deviceLib/hip_threadfence_system.cpp | 136 -- tests/src/deviceLib/hip_trig.cpp | 112 -- tests/src/deviceLib/vector_test_common.h | 27 - .../src/dynamicLoading/bit_extract_kernel.cpp | 34 - .../complex_loading_behavior.cpp | 368 ----- .../src/dynamicLoading/hipApiDynamicLoad.cpp | 163 --- tests/src/experimental/xcompile/gApi.c | 31 - tests/src/experimental/xcompile/gHipApi.c | 68 - tests/src/experimental/xcompile/gHipApi.h | 43 - tests/src/experimental/xcompile/gapi.sh | 9 - tests/src/experimental/xcompile/ghipapi.sh | 8 - tests/src/experimental/xcompile/gxxApi.cpp | 32 - tests/src/experimental/xcompile/gxxApi1.cpp | 31 - tests/src/experimental/xcompile/gxxApi1.h | 31 - tests/src/experimental/xcompile/gxxHipApi.cpp | 34 - tests/src/experimental/xcompile/gxxHipApi.h | 75 -- tests/src/experimental/xcompile/gxxapi.sh | 8 - tests/src/experimental/xcompile/gxxhipapi.sh | 8 - tests/src/experimental/xcompile/hHip.c | 63 - tests/src/experimental/xcompile/hHipApi.c | 44 - tests/src/experimental/xcompile/hipapig.sh | 3 - tests/src/experimental/xcompile/hipapigxx.sh | 3 - tests/src/experimental/xcompile/hipg.sh | 8 - tests/src/experimental/xcompile/hipgapi.sh | 8 - tests/src/experimental/xcompile/hipgxx.sh | 7 - tests/src/experimental/xcompile/hipgxxapi.sh | 7 - tests/src/experimental/xcompile/hipxxKer.cpp | 53 - tests/src/experimental/xcompile/hxxHip.cpp | 64 - tests/src/experimental/xcompile/hxxHipApi.cpp | 61 - tests/src/g++/hipMalloc.cpp | 35 - tests/src/gcc/LaunchKernel.c | 182 --- tests/src/gcc/LaunchKernel.h | 43 - tests/src/gcc/gpu.cpp | 63 - tests/src/gcc/hipMalloc.c | 37 - tests/src/hipC.cpp | 47 - tests/src/hipCKernel.c | 20 - tests/src/hipEnvVar.cpp | 110 -- tests/src/hipEnvVarDriver.cpp | 152 --- tests/src/hiprtc/hiprtcGetLoweredName.cpp | 170 --- tests/src/hiprtc/saxpy.cpp | 173 --- tests/src/hostcall/hipHostcallFuncCall.cpp | 106 -- tests/src/hostcall/hipHostcallPrintThings.cpp | 125 -- tests/src/ipc/MultiProcess.h | 157 --- tests/src/ipc/hipMultiProcIpcEvent.cpp | 126 -- tests/src/ipc/hipMultiProcIpcMem.cpp | 103 -- tests/src/kernel/hipDynamicShared.cpp | 156 --- tests/src/kernel/hipDynamicShared2.cpp | 74 - tests/src/kernel/hipEmptyKernel.cpp | 34 - tests/src/kernel/hipExtLaunchKernelGGL.cpp | 106 -- tests/src/kernel/hipGridLaunch.cpp | 116 -- tests/src/kernel/hipLanguageExtensions.cpp | 105 -- tests/src/kernel/hipLaunchParm.cpp | 962 ------------- tests/src/kernel/hipLaunchParmFunctor.cpp | 425 ------ tests/src/kernel/hipPrintfKernel.cpp | 41 - tests/src/kernel/hipShflTests.cpp | 176 --- tests/src/kernel/hipShflUpDownTest.cpp | 185 --- tests/src/kernel/hipTestConstant.cpp | 62 - tests/src/kernel/hipTestGlobalVariable.cpp | 91 -- tests/src/kernel/hipTestMallocKernel.cpp | 66 - tests/src/kernel/hipTestMemKernel.cpp | 230 ---- tests/src/kernel/inline_asm_vadd.cpp | 120 -- tests/src/kernel/inline_asm_vmac.cpp | 112 -- tests/src/kernel/launch_bounds.cpp | 112 -- tests/src/nvcc/Device/hipChooseDevice.cpp | 29 - .../src/nvcc/Device/hipDeviceGetAttribute.cpp | 38 - .../nvcc/Device/hipDeviceGetCacheConfig.cpp | 27 - tests/src/nvcc/Device/hipDeviceGetLimit.cpp | 26 - .../Device/hipDeviceGetSharedMemConfig.cpp | 27 - tests/src/nvcc/Device/hipGetDevice.cpp | 26 - tests/src/nvcc/Device/hipGetDeviceCount.cpp | 23 - .../nvcc/Device/hipGetDeviceProperties.cpp | 30 - tests/src/nvcc/Device/hipSetDevice.cpp | 28 - tests/src/nvcc/Device/hipSetDeviceFlags.cpp | 26 - tests/src/p2p/hipPeerToPeer_simple.cpp | 462 ------- tests/src/printf/hipPrintfAltForms.cpp | 83 -- tests/src/printf/hipPrintfBasic.cpp | 327 ----- tests/src/printf/hipPrintfFlags.cpp | 71 - tests/src/printf/hipPrintfManyDevices.cpp | 111 -- tests/src/printf/hipPrintfManyTypes.cpp | 242 ---- tests/src/printf/hipPrintfManyWaves.cpp | 382 ------ tests/src/printf/hipPrintfSpecifiers.cpp | 145 -- tests/src/printf/hipPrintfStar.cpp | 64 - tests/src/printf/hipPrintfUtil.h | 513 ------- tests/src/printf/hipPrintfWidthPrecision.cpp | 77 -- tests/src/printf/printf_common.h | 163 --- .../cooperativeGrps/api_failure_tests.cpp | 286 ---- .../coalesced_groups_shfl_down.cpp | 276 ---- .../coalesced_groups_shfl_up.cpp | 260 ---- .../cooperativeGrps/cooperative_streams.cpp | 465 ------- .../grid_group_data_sharing.cpp | 303 ----- .../cooperativeGrps/hipCGGridGroupType.cpp | 168 --- .../hipCGGridGroupTypeViaBaseType.cpp | 168 --- .../hipCGGridGroupTypeViaPublicApi.cpp | 168 --- .../hipLaunchCoopMultiKernel.cpp | 212 --- .../hipLaunchCooperativeKernel.cpp | 163 --- .../multi_gpu_api_failure_tests.cpp | 587 -------- .../cooperativeGrps/multi_gpu_streams.cpp | 660 --------- .../multi_grid_group_all_gpus.cpp | 470 ------- .../simple_coalesced_groups.cpp | 583 -------- .../simple_grid_group_barrier.cpp | 286 ---- .../simple_multi_grid_group_barrier.cpp | 470 ------- .../simple_tiled_partition.cpp | 400 ------ .../thread_block_tile_shfl_ops.cpp | 231 ---- .../thread_block_tiled_shfl_up.cpp | 179 --- .../src/runtimeApi/device/hipChooseDevice.cpp | 49 - .../device/hipDeviceComputeCapability.cpp | 45 - .../device/hipDeviceGetByPCIBusId.cpp | 182 --- .../runtimeApi/device/hipDeviceGetName.cpp | 46 - .../device/hipDeviceGetPCIBusId.cpp | 233 ---- .../device/hipDeviceSynchronize.cpp | 76 -- .../runtimeApi/device/hipDeviceTotalMem.cpp | 44 - .../device/hipGetDeviceAttribute.cpp | 303 ----- .../device/hipGetDeviceProperties.cpp | 273 ---- .../device/hipRuntimeGetVersion.cpp | 39 - .../runtimeApi/device/hipSetCachceConfig.cpp | 35 - .../runtimeApi/device/hipSetDeviceFlags.cpp | 49 - .../src/runtimeApi/device/hipSetGetDevice.cpp | 679 ---------- .../runtimeApi/error/hipPeekAtLastError.cpp | 36 - tests/src/runtimeApi/event/hipEvent.cpp | 84 -- .../runtimeApi/event/hipEventElapsedTime.cpp | 118 -- tests/src/runtimeApi/event/hipEventIpc.cpp | 107 -- .../event/hipEventMultiThreaded.cpp | 133 -- tests/src/runtimeApi/event/hipEventRecord.cpp | 103 -- tests/src/runtimeApi/event/record_event.cpp | 196 --- tests/src/runtimeApi/graph/hipChildGraph.cpp | 191 --- tests/src/runtimeApi/graph/hipGraph.cpp | 359 ----- .../graph/hipSimpleGraphWithKernel.cpp | 142 -- .../memory/d2dMemCpyWithPinnedHostMemory.cpp | 258 ---- tests/src/runtimeApi/memory/hipArray.cpp | 297 ---- .../src/runtimeApi/memory/hipHostGetFlags.cpp | 87 -- tests/src/runtimeApi/memory/hipHostMalloc.cpp | 211 --- .../runtimeApi/memory/hipHostMallocTests.cpp | 90 -- .../runtimeApi/memory/hipHostMallocTests.h | 95 -- .../src/runtimeApi/memory/hipHostRegister.cpp | 183 --- .../runtimeApi/memory/hipIpcMemAccessTest.cpp | 242 ---- .../memory/hipMallocConcurrency.cpp | 503 ------- .../runtimeApi/memory/hipMallocManaged.cpp | 95 -- .../memory/hipMallocManaged_MultiScenario.cpp | 463 ------- .../runtimeApi/memory/hipManagedKeyword.cpp | 84 -- .../runtimeApi/memory/hipMemPtrGetInfo.cpp | 52 - tests/src/runtimeApi/memory/hipMemcpy.cpp | 529 -------- tests/src/runtimeApi/memory/hipMemcpy2D.cpp | 341 ----- .../runtimeApi/memory/hipMemcpy2DAsync.cpp | 395 ------ .../memory/hipMemcpy2DFromArray.cpp | 315 ----- .../memory/hipMemcpy2DFromArrayAsync.cpp | 321 ----- .../runtimeApi/memory/hipMemcpy2D_simple.cpp | 114 -- tests/src/runtimeApi/memory/hipMemcpy3D.cpp | 111 -- tests/src/runtimeApi/memory/hipMemcpyAll.cpp | 128 -- .../src/runtimeApi/memory/hipMemcpyAsync.cpp | 381 ------ .../src/runtimeApi/memory/hipMemcpyAsync2.cpp | 39 - tests/src/runtimeApi/memory/hipMemcpyAtoH.cpp | 229 ---- tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp | 85 -- .../runtimeApi/memory/hipMemcpyDtoDAsync.cpp | 91 -- tests/src/runtimeApi/memory/hipMemcpyHtoA.cpp | 239 ---- .../memory/hipMemcpyNegativeMThrdMSize.cpp | 1192 ----------------- .../memory/hipMemcpyNegetiveTests.cpp | 53 - tests/src/runtimeApi/memory/hipMemcpyPeer.cpp | 86 -- .../runtimeApi/memory/hipMemcpyPeerAsync.cpp | 93 -- .../runtimeApi/memory/hipMemcpyWithStream.cpp | 513 ------- .../memory/hipMemcpyWithStreamMultiThread.cpp | 668 --------- .../runtimeApi/memory/hipMemcpy_simple.cpp | 180 --- .../memory/hipMemoryAllocateCoherent.cpp | 60 - .../hipMemoryAllocateCoherentDriver.cpp | 66 - tests/src/runtimeApi/memory/hipMemset.cpp | 210 --- tests/src/runtimeApi/memory/hipMemset2D.cpp | 235 ---- .../hipMemset2DAsyncMultiThreadAndKernel.cpp | 181 --- tests/src/runtimeApi/memory/hipMemset3D.cpp | 146 -- .../memory/hipMemset3DFunctional.cpp | 489 ------- .../runtimeApi/memory/hipMemset3DNegative.cpp | 254 ---- .../memory/hipMemset3DRegressMultiThread.cpp | 329 ----- .../memory/hipMemsetAsyncAndKernel.cpp | 194 --- .../memory/hipMemsetAsyncMultiThread.cpp | 248 ---- .../runtimeApi/memory/hipMemsetInvalidPtr.cpp | 97 -- .../hipMultiMemcpyMultiThrdMultiStrm.cpp | 115 -- .../memory/hipMultiMemcpyMultiThread.cpp | 117 -- .../memory/hipPointerAttributes.cpp | 339 ----- .../memory/hipRandomMemcpyAsync.cpp | 104 -- .../runtimeApi/memory/hipTestMemcpyPin.cpp | 39 - .../runtimeApi/memory/p2p_copy_coherency.cpp | 201 --- tests/src/runtimeApi/module/empty_kernel.cpp | 24 - tests/src/runtimeApi/module/global_kernel.cpp | 39 - .../module/hipExtLaunchKernelGGL.cpp | 253 ---- .../hipExtLaunchMultiKernelMultiDevice.cpp | 141 -- .../module/hipExtModuleLaunchKernel.cpp | 627 --------- .../module/hipFuncGetAttributes.cpp | 52 - .../runtimeApi/module/hipFuncSetAttribute.cpp | 46 - .../module/hipFuncSetCacheConfig.cpp | 35 - .../module/hipFuncSetSharedMemConfig.cpp | 116 -- .../runtimeApi/module/hipManagedKeyword.cpp | 75 -- tests/src/runtimeApi/module/hipModule.cpp | 212 --- .../runtimeApi/module/hipModuleGetGlobal.cpp | 159 --- .../module/hipModuleLaunchKernel.cpp | 356 ----- .../runtimeApi/module/hipModuleLoadData.cpp | 98 -- .../hipModuleLoadDataMultThreadOnMultGPU.cpp | 152 --- .../module/hipModuleLoadDataMultThreaded.cpp | 145 -- .../hipModuleLoadMultProcessOnMultGPU.cpp | 311 ----- .../module/hipModuleLoadMultiThreaded.cpp | 115 -- .../module/hipModuleLoadUnloadStress.cpp | 119 -- .../runtimeApi/module/hipModuleNegative.cpp | 903 ------------- ...leOccupancyMaxPotentialActiveBlockSize.cpp | 55 - .../module/hipModuleTexture2dDrv.cpp | 670 --------- .../src/runtimeApi/module/hipModuleUnload.cpp | 30 - .../src/runtimeApi/module/hipOpenCLCOTest.cpp | 250 ---- .../module/kernel_composite_test.cpp | 41 - .../src/runtimeApi/module/managed_kernel.cpp | 27 - tests/src/runtimeApi/module/matmul.cpp | 119 -- tests/src/runtimeApi/module/opencl_add.cpp | 37 - tests/src/runtimeApi/module/tex2d_kernel.cpp | 73 - tests/src/runtimeApi/module/vcpy_kernel.code | Bin 18343 -> 0 bytes tests/src/runtimeApi/module/vcpy_kernel.cpp | 28 - .../multiThread/hipMultiThreadDevice.cpp | 148 -- .../multiThread/hipMultiThreadStreams1.cpp | 173 --- .../multiThread/hipMultiThreadStreams2.cpp | 129 -- ...upancyMaxActiveBlocksPerMultiprocessor.cpp | 202 --- .../hipOccupancyMaxPotentialBlockSize.cpp | 177 --- .../p2p/hipDeviceGetP2PAttribute.cpp | 57 - .../p2p/hipP2pLinkTypeAndHopFunc.cpp | 436 ------ .../runtimeApi/p2p/hipP2pLinkTypeAndHopFunc.h | 113 -- .../runtimeApi/stream/StreamAddCallback.cpp | 144 -- .../runtimeApi/stream/hipAPIStreamDisable.cpp | 67 - .../runtimeApi/stream/hipAPIStreamEnable.cpp | 69 - .../src/runtimeApi/stream/hipMultiStreams.cpp | 89 -- tests/src/runtimeApi/stream/hipNullStream.cpp | 297 ---- tests/src/runtimeApi/stream/hipStream.h | 117 -- .../stream/hipStreamACb_AltEnqueue.cpp | 207 --- .../stream/hipStreamACb_MStrm_Mgpu.cpp | 182 --- .../stream/hipStreamACb_MultiCalls.cpp | 152 --- .../stream/hipStreamACb_MultiThread.cpp | 189 --- .../stream/hipStreamACb_StrmSyncTiming.cpp | 164 --- .../stream/hipStreamACb_ThrdBehaviour.cpp | 62 - .../runtimeApi/stream/hipStreamACb_order.cpp | 81 -- .../stream/hipStreamAddCallback.cpp | 89 -- .../stream/hipStreamAddCallbackCatch.cpp | 409 ------ .../stream/hipStreamCreateWithPriority.cpp | 223 --- .../runtimeApi/stream/hipStreamGetCUMask.cpp | 168 --- .../runtimeApi/stream/hipStreamGetFlags.cpp | 43 - .../stream/hipStreamGetPriority.cpp | 92 -- tests/src/runtimeApi/stream/hipStreamL5.cpp | 851 ------------ .../src/runtimeApi/stream/hipStreamSync2.cpp | 211 --- .../runtimeApi/stream/hipStreamWaitEvent.cpp | 501 ------- .../runtimeApi/stream/hipStreamWithCUMask.cpp | 171 --- .../streamOperations/hipstream_operations.cpp | 460 ------- .../cache_coherency_cpu_gpu.cpp | 265 ---- .../cache_coherency_gpu_gpu.cpp | 284 ---- .../synchronization/copy_coherency.cpp | 370 ----- .../synchronization/memcpyInt.device.cpp | 11 - tests/src/specialFunc.cu | 30 - tests/src/stress/README.md | 2 - tests/src/stress/hipStressAsync.cpp | 63 - tests/src/stress/hipStressChain.cpp | 64 - tests/src/stress/hipStressKernel.cpp | 65 - tests/src/stress/hipStressMemcpy.cpp | 57 - tests/src/stress/hipStressSync.cpp | 62 - tests/src/surface/hipSurfaceObj2D.cpp | 108 -- tests/src/testAPIStream.sh | 8 - tests/src/test_common.cpp | 261 ---- tests/src/test_common.h | 586 -------- tests/src/texture/hipBindTex2DPitch.cpp | 81 -- tests/src/texture/hipBindTexRef1DFetch.cpp | 99 -- tests/src/texture/hipGetChanDesc.cpp | 65 - .../texture/hipNormalizedFloatValueTex.cpp | 209 --- tests/src/texture/hipTex1DFetchCheckModes.cpp | 121 -- tests/src/texture/hipTexObjPitch.cpp | 108 -- tests/src/texture/hipTextureHelper.hpp | 227 ---- tests/src/texture/hipTextureMipmapObj2D.cpp | 189 --- .../src/texture/hipTextureObj1DCheckModes.cpp | 125 -- tests/src/texture/hipTextureObj1DFetch.cpp | 104 -- tests/src/texture/hipTextureObj2D.cpp | 112 -- .../src/texture/hipTextureObj2DCheckModes.cpp | 140 -- .../src/texture/hipTextureObj3DCheckModes.cpp | 173 --- tests/src/texture/hipTextureRef2D.cpp | 99 -- tests/src/texture/simpleTexture2DLayered.cpp | 113 -- tests/src/texture/simpleTexture3D.cpp | 140 -- tests/src/timer.cpp | 116 -- tests/src/timer.h | 28 - tests/unit/test_common.cpp | 180 --- tests/unit/test_common.h | 473 ------- 369 files changed, 91 insertions(+), 62655 deletions(-) delete mode 100644 tests/README.md delete mode 100644 tests/Tests.cmake delete mode 100755 tests/hit/HIT.cmake delete mode 100755 tests/hit/parser delete mode 100644 tests/src/.gitignore delete mode 100644 tests/src/Functional/device/hipFuncDeviceSynchronize.cpp delete mode 100644 tests/src/Functional/device/hipFuncGetDevice.cpp delete mode 100644 tests/src/Functional/device/hipFuncSetDevice.cpp delete mode 100644 tests/src/Functional/device/hipFuncSetDeviceFlags.cpp delete mode 100644 tests/src/Functional/host/hipFloat16.cpp delete mode 100644 tests/src/Makefile delete mode 100644 tests/src/Negative/Device/hipDeviceGetAttribute.cpp delete mode 100644 tests/src/Negative/Device/hipDeviceUtil.h delete mode 100644 tests/src/Negative/Device/hipGetDevice.cpp delete mode 100644 tests/src/Negative/Device/hipGetDeviceCount.cpp delete mode 100644 tests/src/Negative/Device/hipGetDeviceProperties.cpp delete mode 100644 tests/src/Negative/Device/hipSetDevice.cpp delete mode 100644 tests/src/Negative/memory/hipMemcpyFromSymbol.cpp delete mode 100644 tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp delete mode 100644 tests/src/Negative/memory/hipMemcpyToSymbol.cpp delete mode 100644 tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp delete mode 100644 tests/src/Negative/memory/hipMemory.cpp delete mode 100644 tests/src/Negative/stream/hipStreamCreateWithFlags.cpp delete mode 100644 tests/src/clara/clara.hpp delete mode 100644 tests/src/compiler/hipClassKernel.cpp delete mode 100644 tests/src/compiler/hipClassKernel.h delete mode 100644 tests/src/context/hipCtx_simple.cpp delete mode 100644 tests/src/context/hipDrvGetPCIBusId.cpp delete mode 100644 tests/src/context/hipDrvMemcpy.cpp delete mode 100644 tests/src/context/hipMemsetD8.cpp delete mode 100644 tests/src/cppstd/hipInvocable11.cpp delete mode 100644 tests/src/cppstd/hipInvocable11Fallback.cpp delete mode 100644 tests/src/cppstd/hipInvocable14.cpp delete mode 100644 tests/src/cppstd/hipInvocable17.cpp delete mode 100644 tests/src/cppstd/is_callable_test.hpp delete mode 100644 tests/src/cudaRegister.cu delete mode 100644 tests/src/deviceLib/hipBfloat16.cpp delete mode 100644 tests/src/deviceLib/hipComplex.cpp delete mode 100644 tests/src/deviceLib/hipDeviceMalloc.cpp delete mode 100644 tests/src/deviceLib/hipDeviceMemcpy.cpp delete mode 100644 tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp delete mode 100644 tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp delete mode 100644 tests/src/deviceLib/hipDoublePrecisionMathHost.cpp delete mode 100644 tests/src/deviceLib/hipFloatMath.cpp delete mode 100644 tests/src/deviceLib/hipFloatMathPrecise.cpp delete mode 100644 tests/src/deviceLib/hipHalf2Comparision.cpp delete mode 100644 tests/src/deviceLib/hipIntegerIntrinsics.cpp delete mode 100644 tests/src/deviceLib/hipLaunchKernelFunc.cpp delete mode 100644 tests/src/deviceLib/hipMathFunctions.cpp delete mode 100644 tests/src/deviceLib/hipSimpleAtomicsTest.cpp delete mode 100644 tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp delete mode 100644 tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp delete mode 100644 tests/src/deviceLib/hipSinglePrecisionMathHost.cpp delete mode 100644 tests/src/deviceLib/hipStdComplex.cpp delete mode 100644 tests/src/deviceLib/hipTestAtomicAdd.cpp delete mode 100644 tests/src/deviceLib/hipTestClock.cpp delete mode 100644 tests/src/deviceLib/hipTestDevice.cpp delete mode 100644 tests/src/deviceLib/hipTestDeviceDouble.cpp delete mode 100644 tests/src/deviceLib/hipTestDeviceLimit.cpp delete mode 100644 tests/src/deviceLib/hipTestDeviceSymbol.cpp delete mode 100644 tests/src/deviceLib/hipTestDotFunctions.cpp delete mode 100644 tests/src/deviceLib/hipTestFMA.cpp delete mode 100644 tests/src/deviceLib/hipTestHalf.cpp delete mode 100644 tests/src/deviceLib/hipTestHost.cpp delete mode 100644 tests/src/deviceLib/hipTestIncludeMath.cpp delete mode 100644 tests/src/deviceLib/hipTestNativeHalf.cpp delete mode 100644 tests/src/deviceLib/hipTestNew.cpp delete mode 100644 tests/src/deviceLib/hipThreadFence.cpp delete mode 100644 tests/src/deviceLib/hipVectorTypes.cpp delete mode 100644 tests/src/deviceLib/hipVectorTypesDevice.cpp delete mode 100644 tests/src/deviceLib/hipVersion.cpp delete mode 100644 tests/src/deviceLib/hip_anyall.cpp delete mode 100644 tests/src/deviceLib/hip_ballot.cpp delete mode 100644 tests/src/deviceLib/hip_bitextract.cpp delete mode 100644 tests/src/deviceLib/hip_bitinsert.cpp delete mode 100644 tests/src/deviceLib/hip_brev.cpp delete mode 100644 tests/src/deviceLib/hip_clz.cpp delete mode 100644 tests/src/deviceLib/hip_ffs.cpp delete mode 100644 tests/src/deviceLib/hip_floatnTM.cpp delete mode 100644 tests/src/deviceLib/hip_funnelshift.cpp delete mode 100644 tests/src/deviceLib/hip_mbcnt.cpp delete mode 100644 tests/src/deviceLib/hip_popc.cpp delete mode 100644 tests/src/deviceLib/hip_test_ldg.cpp delete mode 100644 tests/src/deviceLib/hip_test_make_type.cpp delete mode 100644 tests/src/deviceLib/hip_test_syncthreads_and.cpp delete mode 100644 tests/src/deviceLib/hip_test_syncthreads_count.cpp delete mode 100644 tests/src/deviceLib/hip_test_syncthreads_or.cpp delete mode 100644 tests/src/deviceLib/hip_threadfence_system.cpp delete mode 100644 tests/src/deviceLib/hip_trig.cpp delete mode 100644 tests/src/deviceLib/vector_test_common.h delete mode 100644 tests/src/dynamicLoading/bit_extract_kernel.cpp delete mode 100644 tests/src/dynamicLoading/complex_loading_behavior.cpp delete mode 100644 tests/src/dynamicLoading/hipApiDynamicLoad.cpp delete mode 100644 tests/src/experimental/xcompile/gApi.c delete mode 100644 tests/src/experimental/xcompile/gHipApi.c delete mode 100644 tests/src/experimental/xcompile/gHipApi.h delete mode 100755 tests/src/experimental/xcompile/gapi.sh delete mode 100755 tests/src/experimental/xcompile/ghipapi.sh delete mode 100644 tests/src/experimental/xcompile/gxxApi.cpp delete mode 100644 tests/src/experimental/xcompile/gxxApi1.cpp delete mode 100644 tests/src/experimental/xcompile/gxxApi1.h delete mode 100644 tests/src/experimental/xcompile/gxxHipApi.cpp delete mode 100644 tests/src/experimental/xcompile/gxxHipApi.h delete mode 100755 tests/src/experimental/xcompile/gxxapi.sh delete mode 100755 tests/src/experimental/xcompile/gxxhipapi.sh delete mode 100644 tests/src/experimental/xcompile/hHip.c delete mode 100644 tests/src/experimental/xcompile/hHipApi.c delete mode 100755 tests/src/experimental/xcompile/hipapig.sh delete mode 100755 tests/src/experimental/xcompile/hipapigxx.sh delete mode 100755 tests/src/experimental/xcompile/hipg.sh delete mode 100755 tests/src/experimental/xcompile/hipgapi.sh delete mode 100755 tests/src/experimental/xcompile/hipgxx.sh delete mode 100755 tests/src/experimental/xcompile/hipgxxapi.sh delete mode 100644 tests/src/experimental/xcompile/hipxxKer.cpp delete mode 100644 tests/src/experimental/xcompile/hxxHip.cpp delete mode 100644 tests/src/experimental/xcompile/hxxHipApi.cpp delete mode 100644 tests/src/g++/hipMalloc.cpp delete mode 100644 tests/src/gcc/LaunchKernel.c delete mode 100644 tests/src/gcc/LaunchKernel.h delete mode 100644 tests/src/gcc/gpu.cpp delete mode 100644 tests/src/gcc/hipMalloc.c delete mode 100644 tests/src/hipC.cpp delete mode 100644 tests/src/hipCKernel.c delete mode 100644 tests/src/hipEnvVar.cpp delete mode 100644 tests/src/hipEnvVarDriver.cpp delete mode 100644 tests/src/hiprtc/hiprtcGetLoweredName.cpp delete mode 100644 tests/src/hiprtc/saxpy.cpp delete mode 100644 tests/src/hostcall/hipHostcallFuncCall.cpp delete mode 100644 tests/src/hostcall/hipHostcallPrintThings.cpp delete mode 100755 tests/src/ipc/MultiProcess.h delete mode 100644 tests/src/ipc/hipMultiProcIpcEvent.cpp delete mode 100644 tests/src/ipc/hipMultiProcIpcMem.cpp delete mode 100644 tests/src/kernel/hipDynamicShared.cpp delete mode 100644 tests/src/kernel/hipDynamicShared2.cpp delete mode 100644 tests/src/kernel/hipEmptyKernel.cpp delete mode 100644 tests/src/kernel/hipExtLaunchKernelGGL.cpp delete mode 100644 tests/src/kernel/hipGridLaunch.cpp delete mode 100644 tests/src/kernel/hipLanguageExtensions.cpp delete mode 100644 tests/src/kernel/hipLaunchParm.cpp delete mode 100644 tests/src/kernel/hipLaunchParmFunctor.cpp delete mode 100644 tests/src/kernel/hipPrintfKernel.cpp delete mode 100644 tests/src/kernel/hipShflTests.cpp delete mode 100644 tests/src/kernel/hipShflUpDownTest.cpp delete mode 100644 tests/src/kernel/hipTestConstant.cpp delete mode 100644 tests/src/kernel/hipTestGlobalVariable.cpp delete mode 100644 tests/src/kernel/hipTestMallocKernel.cpp delete mode 100644 tests/src/kernel/hipTestMemKernel.cpp delete mode 100644 tests/src/kernel/inline_asm_vadd.cpp delete mode 100644 tests/src/kernel/inline_asm_vmac.cpp delete mode 100644 tests/src/kernel/launch_bounds.cpp delete mode 100644 tests/src/nvcc/Device/hipChooseDevice.cpp delete mode 100644 tests/src/nvcc/Device/hipDeviceGetAttribute.cpp delete mode 100644 tests/src/nvcc/Device/hipDeviceGetCacheConfig.cpp delete mode 100644 tests/src/nvcc/Device/hipDeviceGetLimit.cpp delete mode 100644 tests/src/nvcc/Device/hipDeviceGetSharedMemConfig.cpp delete mode 100644 tests/src/nvcc/Device/hipGetDevice.cpp delete mode 100644 tests/src/nvcc/Device/hipGetDeviceCount.cpp delete mode 100644 tests/src/nvcc/Device/hipGetDeviceProperties.cpp delete mode 100644 tests/src/nvcc/Device/hipSetDevice.cpp delete mode 100644 tests/src/nvcc/Device/hipSetDeviceFlags.cpp delete mode 100644 tests/src/p2p/hipPeerToPeer_simple.cpp delete mode 100644 tests/src/printf/hipPrintfAltForms.cpp delete mode 100644 tests/src/printf/hipPrintfBasic.cpp delete mode 100644 tests/src/printf/hipPrintfFlags.cpp delete mode 100644 tests/src/printf/hipPrintfManyDevices.cpp delete mode 100644 tests/src/printf/hipPrintfManyTypes.cpp delete mode 100644 tests/src/printf/hipPrintfManyWaves.cpp delete mode 100644 tests/src/printf/hipPrintfSpecifiers.cpp delete mode 100644 tests/src/printf/hipPrintfStar.cpp delete mode 100644 tests/src/printf/hipPrintfUtil.h delete mode 100644 tests/src/printf/hipPrintfWidthPrecision.cpp delete mode 100644 tests/src/printf/printf_common.h delete mode 100644 tests/src/runtimeApi/cooperativeGrps/api_failure_tests.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_down.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_up.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/cooperative_streams.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/grid_group_data_sharing.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupType.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaBaseType.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaPublicApi.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/hipLaunchCoopMultiKernel.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/hipLaunchCooperativeKernel.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/multi_gpu_api_failure_tests.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/multi_gpu_streams.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/multi_grid_group_all_gpus.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/simple_coalesced_groups.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/simple_grid_group_barrier.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/simple_multi_grid_group_barrier.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/simple_tiled_partition.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/thread_block_tile_shfl_ops.cpp delete mode 100644 tests/src/runtimeApi/cooperativeGrps/thread_block_tiled_shfl_up.cpp delete mode 100644 tests/src/runtimeApi/device/hipChooseDevice.cpp delete mode 100644 tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp delete mode 100644 tests/src/runtimeApi/device/hipDeviceGetByPCIBusId.cpp delete mode 100644 tests/src/runtimeApi/device/hipDeviceGetName.cpp delete mode 100644 tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp delete mode 100644 tests/src/runtimeApi/device/hipDeviceSynchronize.cpp delete mode 100644 tests/src/runtimeApi/device/hipDeviceTotalMem.cpp delete mode 100644 tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp delete mode 100644 tests/src/runtimeApi/device/hipGetDeviceProperties.cpp delete mode 100644 tests/src/runtimeApi/device/hipRuntimeGetVersion.cpp delete mode 100644 tests/src/runtimeApi/device/hipSetCachceConfig.cpp delete mode 100644 tests/src/runtimeApi/device/hipSetDeviceFlags.cpp delete mode 100644 tests/src/runtimeApi/device/hipSetGetDevice.cpp delete mode 100644 tests/src/runtimeApi/error/hipPeekAtLastError.cpp delete mode 100644 tests/src/runtimeApi/event/hipEvent.cpp delete mode 100644 tests/src/runtimeApi/event/hipEventElapsedTime.cpp delete mode 100644 tests/src/runtimeApi/event/hipEventIpc.cpp delete mode 100644 tests/src/runtimeApi/event/hipEventMultiThreaded.cpp delete mode 100644 tests/src/runtimeApi/event/hipEventRecord.cpp delete mode 100644 tests/src/runtimeApi/event/record_event.cpp delete mode 100644 tests/src/runtimeApi/graph/hipChildGraph.cpp delete mode 100644 tests/src/runtimeApi/graph/hipGraph.cpp delete mode 100644 tests/src/runtimeApi/graph/hipSimpleGraphWithKernel.cpp delete mode 100644 tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp delete mode 100644 tests/src/runtimeApi/memory/hipArray.cpp delete mode 100644 tests/src/runtimeApi/memory/hipHostGetFlags.cpp delete mode 100644 tests/src/runtimeApi/memory/hipHostMalloc.cpp delete mode 100644 tests/src/runtimeApi/memory/hipHostMallocTests.cpp delete mode 100644 tests/src/runtimeApi/memory/hipHostMallocTests.h delete mode 100644 tests/src/runtimeApi/memory/hipHostRegister.cpp delete mode 100644 tests/src/runtimeApi/memory/hipIpcMemAccessTest.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMallocConcurrency.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMallocManaged.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMallocManaged_MultiScenario.cpp delete mode 100644 tests/src/runtimeApi/memory/hipManagedKeyword.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy2D.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy2DFromArray.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy2DFromArrayAsync.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy2D_simple.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy3D.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyAll.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyAsync.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyAsync2.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyAtoH.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyHtoA.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyNegativeMThrdMSize.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyPeer.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyPeerAsync.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyWithStream.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemcpy_simple.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset2D.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset3D.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset3DFunctional.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset3DNegative.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemset3DRegressMultiThread.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemsetAsyncAndKernel.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemsetAsyncMultiThread.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMemsetInvalidPtr.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMultiMemcpyMultiThrdMultiStrm.cpp delete mode 100644 tests/src/runtimeApi/memory/hipMultiMemcpyMultiThread.cpp delete mode 100644 tests/src/runtimeApi/memory/hipPointerAttributes.cpp delete mode 100644 tests/src/runtimeApi/memory/hipRandomMemcpyAsync.cpp delete mode 100644 tests/src/runtimeApi/memory/hipTestMemcpyPin.cpp delete mode 100644 tests/src/runtimeApi/memory/p2p_copy_coherency.cpp delete mode 100644 tests/src/runtimeApi/module/empty_kernel.cpp delete mode 100644 tests/src/runtimeApi/module/global_kernel.cpp delete mode 100644 tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp delete mode 100644 tests/src/runtimeApi/module/hipExtLaunchMultiKernelMultiDevice.cpp delete mode 100644 tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp delete mode 100644 tests/src/runtimeApi/module/hipFuncGetAttributes.cpp delete mode 100644 tests/src/runtimeApi/module/hipFuncSetAttribute.cpp delete mode 100644 tests/src/runtimeApi/module/hipFuncSetCacheConfig.cpp delete mode 100644 tests/src/runtimeApi/module/hipFuncSetSharedMemConfig.cpp delete mode 100644 tests/src/runtimeApi/module/hipManagedKeyword.cpp delete mode 100644 tests/src/runtimeApi/module/hipModule.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleGetGlobal.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLaunchKernel.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLoadData.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLoadMultProcessOnMultGPU.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLoadMultiThreaded.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleLoadUnloadStress.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleNegative.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp delete mode 100644 tests/src/runtimeApi/module/hipModuleUnload.cpp delete mode 100644 tests/src/runtimeApi/module/hipOpenCLCOTest.cpp delete mode 100644 tests/src/runtimeApi/module/kernel_composite_test.cpp delete mode 100644 tests/src/runtimeApi/module/managed_kernel.cpp delete mode 100644 tests/src/runtimeApi/module/matmul.cpp delete mode 100644 tests/src/runtimeApi/module/opencl_add.cpp delete mode 100644 tests/src/runtimeApi/module/tex2d_kernel.cpp delete mode 100644 tests/src/runtimeApi/module/vcpy_kernel.code delete mode 100644 tests/src/runtimeApi/module/vcpy_kernel.cpp delete mode 100644 tests/src/runtimeApi/multiThread/hipMultiThreadDevice.cpp delete mode 100644 tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp delete mode 100644 tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp delete mode 100644 tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp delete mode 100644 tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp delete mode 100644 tests/src/runtimeApi/p2p/hipDeviceGetP2PAttribute.cpp delete mode 100644 tests/src/runtimeApi/p2p/hipP2pLinkTypeAndHopFunc.cpp delete mode 100644 tests/src/runtimeApi/p2p/hipP2pLinkTypeAndHopFunc.h delete mode 100644 tests/src/runtimeApi/stream/StreamAddCallback.cpp delete mode 100644 tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp delete mode 100644 tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp delete mode 100644 tests/src/runtimeApi/stream/hipMultiStreams.cpp delete mode 100644 tests/src/runtimeApi/stream/hipNullStream.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStream.h delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamACb_order.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamAddCallback.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamCreateWithPriority.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamGetCUMask.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamGetFlags.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamGetPriority.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamL5.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamSync2.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp delete mode 100644 tests/src/runtimeApi/stream/hipStreamWithCUMask.cpp delete mode 100644 tests/src/runtimeApi/streamOperations/hipstream_operations.cpp delete mode 100644 tests/src/runtimeApi/synchronization/cache_coherency_cpu_gpu.cpp delete mode 100644 tests/src/runtimeApi/synchronization/cache_coherency_gpu_gpu.cpp delete mode 100644 tests/src/runtimeApi/synchronization/copy_coherency.cpp delete mode 100644 tests/src/runtimeApi/synchronization/memcpyInt.device.cpp delete mode 100644 tests/src/specialFunc.cu delete mode 100644 tests/src/stress/README.md delete mode 100644 tests/src/stress/hipStressAsync.cpp delete mode 100644 tests/src/stress/hipStressChain.cpp delete mode 100644 tests/src/stress/hipStressKernel.cpp delete mode 100644 tests/src/stress/hipStressMemcpy.cpp delete mode 100644 tests/src/stress/hipStressSync.cpp delete mode 100644 tests/src/surface/hipSurfaceObj2D.cpp delete mode 100755 tests/src/testAPIStream.sh delete mode 100644 tests/src/test_common.cpp delete mode 100644 tests/src/test_common.h delete mode 100644 tests/src/texture/hipBindTex2DPitch.cpp delete mode 100644 tests/src/texture/hipBindTexRef1DFetch.cpp delete mode 100644 tests/src/texture/hipGetChanDesc.cpp delete mode 100644 tests/src/texture/hipNormalizedFloatValueTex.cpp delete mode 100644 tests/src/texture/hipTex1DFetchCheckModes.cpp delete mode 100644 tests/src/texture/hipTexObjPitch.cpp delete mode 100644 tests/src/texture/hipTextureHelper.hpp delete mode 100644 tests/src/texture/hipTextureMipmapObj2D.cpp delete mode 100644 tests/src/texture/hipTextureObj1DCheckModes.cpp delete mode 100644 tests/src/texture/hipTextureObj1DFetch.cpp delete mode 100644 tests/src/texture/hipTextureObj2D.cpp delete mode 100644 tests/src/texture/hipTextureObj2DCheckModes.cpp delete mode 100644 tests/src/texture/hipTextureObj3DCheckModes.cpp delete mode 100644 tests/src/texture/hipTextureRef2D.cpp delete mode 100644 tests/src/texture/simpleTexture2DLayered.cpp delete mode 100644 tests/src/texture/simpleTexture3D.cpp delete mode 100644 tests/src/timer.cpp delete mode 100644 tests/src/timer.h delete mode 100644 tests/unit/test_common.cpp delete mode 100644 tests/unit/test_common.h diff --git a/docs/developer_guide/build.md b/docs/developer_guide/build.md index 50efbc21a2..dd7607f451 100755 --- a/docs/developer_guide/build.md +++ b/docs/developer_guide/build.md @@ -123,47 +123,14 @@ hip_prof_gen.py -v -p -t --priv /include/hip/hip_runtime_api.h \ ### Build HIP tests -#### Build HIP directed tests -Developers can build HIP directed tests right after build HIP commands, - -```shell -sudo make install -make -j$(nproc) build_tests -``` -By default, all HIP directed tests will be built and generated under the folder `$CLR_DIR/build/hipamd`directed_tests. -Take HIP directed device APIs tests, as an example, all available test applications will have executable files generated under, -`$CLR_DIR/build/hipamd/directed_tests/runtimeApi/device`. - -Run all HIP directed_tests, use the command, - -```shell -ctest -``` -Or -```shell -make test -``` - -Build and run a single directed test, use the follow command as an example, - -```shell -make directed_tests.texture.hipTexObjPitch -cd $CLR_DIR/build/hipamd/directed_tests/texcture -./hipTexObjPitch -``` -Please note, the integrated HIP directed tests, will be deprecated in future release. - - -##### Build HIP catch tests - HIP catch tests, with new architectured Catch2, are official seperated from HIP project, exist in HIP tests repository, can be built via the following instructions. -##### Get HIP tests source code +#### Get HIP tests source code ```shell git clone -b "$ROCM_BRANCH" https://github.com/ROCm-Developer-Tools/hip-tests.git ``` -##### Build HIP tests from source +#### Build HIP tests from source ```shell export HIPTESTS_DIR="$(readlink -f hip-tests)" @@ -183,7 +150,7 @@ cd $HIPTESTS_DIR/build/catch_tests/unit/texture ./TextureTest ``` -##### Build HIP Catch2 standalone test +#### Build HIP Catch2 standalone test HIP Catch2 supports build a standalone test, for example, diff --git a/docs/developer_guide/contributing.md b/docs/developer_guide/contributing.md index 1d9288fbb1..7229f7bf28 100644 --- a/docs/developer_guide/contributing.md +++ b/docs/developer_guide/contributing.md @@ -59,16 +59,16 @@ Selected multilib: .;@m64 ## Unit Testing Environment -HIP includes unit tests in the tests/src directory. -When adding a new HIP feature, add a new unit test as well. -See [tests/README.md](README.md) for more information. +Tests for HIP are hosted at [ROCm-Developer-Tools/hip-tests](https://github.com/ROCm-Developer-Tools/hip-tests). + +To run `hip-tests` please go to the repo and follow the steps. ## Development Flow -Directed tests provide a great place to develop new features alongside the associated test. +`hip-tests` provide a great place to develop new features alongside the associated test. -For applications and benchmarks outside the directed test environment, developments should use a two-step development flow: -- #1. Compile, link, and install HIP/ROCclr. See [Installation](README.md#Installation) notes. +For applications and benchmarks outside the hip-tests environment, developments should use a two-step development flow: +- #1. Compile, link, and install HIP/ROCclr. See [Installation](README.md#Installation) notes. - #2. Relink the target application to include changes in HIP runtime file. ## Environment Variables @@ -124,13 +124,7 @@ Differences or limitations of HIP APIs as compared to CUDA APIs should be clearl ### Presubmit Testing: -Before checking in or submitting a pull request, run all directed tests (see tests/README.md) and all Rodinia tests. -Ensure pass results match starting point: - -```console -> cd examples/ -> ./run_all.sh -``` +Before checking in or submitting a pull request, run all hip-tests (see [ROCm-Developer-Tools/hip-tests](https://github.com/ROCm-Developer-Tools/hip-tests)). ### Checkin messages diff --git a/docs/how_to_guides/debugging.md b/docs/how_to_guides/debugging.md index 94e9c8814e..25a833844f 100644 --- a/docs/how_to_guides/debugging.md +++ b/docs/how_to_guides/debugging.md @@ -98,16 +98,16 @@ Find the GDB manual and other documentation resources online at: ... Reading symbols from ./hipTexObjPitch... (gdb) break main -Breakpoint 1 at 0x4013d1: file /home/test/hip/tests/src/texture/hipTexObjPitch.cpp, line 98. +Breakpoint 1 at 0x4013d1: file /home//hip-tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp, line 56. (gdb) run -Starting program: /home/test/hip/build/directed_tests/texture/hipTexObjPitch +Starting program: MatrixTranspose [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Breakpoint 1, main () - at /home/test/hip/tests/src/texture/hipTexObjPitch.cpp:98 -98 texture2Dtest(); -(gdb)c + at MatrixTranspose.cpp:56 +56 int main() { +(gdb) c ``` @@ -116,66 +116,96 @@ There are also other debugging tools available online developers can google and ## Debugging HIP Applications -Below is an example on Linux to show how to get useful information from the debugger while running a simple memory copy test, which caused an issue of segmentation fault. +Below is an example on Linux to show how to get useful information from the debugger while running a simple hip application, which caused an issue of segmentation fault. + +Simple HIP Program: + +```cpp +#include +#include +#include + +__global__ void kernel_add(int* a, int b) { + int i = threadIdx.x; + a[i] += b; +} + +int main() { + constexpr size_t size = 1024; + int* ptr; + hipMalloc(&ptr, sizeof(int) * size); + hipMemset(ptr, 0, sizeof(int) * size); + std::vector input(size, 0); + size_t i = 100; + std::for_each(input.begin(), input.end(), [&](int& a) { a = i; }); + hipMemcpy(ptr, input.data(), sizeof(int) * size, hipMemcpyHostToDevice); + kernel_add<<<1, size>>>(ptr, 10); + std::vector output = input; + hipMemcpy(output.data(), ptr, sizeof(int) * size, hipMemcpyDeviceToHost); + std::cout << ((std::all_of(output.begin(), output.end(), [&](int a) { return a == (i + 10); })) + ? "passed" + : "failed") + << std::endl; + hipFree(ptr); +} +``` + +Compile and run command: ```console -test: simpleTest2 numElements=4194304 sizeElements=4194304 bytes -Segmentation fault (core dumped) +hipcc app.cpp -ggdb -o app +rocgdb ./app +``` + +```console +(gdb) b main +Breakpoint 1 at 0x21275e: file app.cpp, line 14. (gdb) run -Starting program: /home/test/hipamd/build/directed_tests/runtimeApi/memory/hipMemcpy_simple +Starting program: /home//app +warning: os_agent_id 31475: `Device 1002:164e' architecture not supported. [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". -Breakpoint 1, main (argc=1, argv=0x7fffffffdea8) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:147 -147 int main(int argc, char* argv[]) { -(gdb) c -Continuing. -[New Thread 0x7ffff64c4700 (LWP 146066)] +Breakpoint 1, hipMalloc (devPtr=0x7fffffffe098, size=4096) at /opt/rocm/include/hip/hip_runtime_api.h:8487 +8487 return hipMalloc((void**)devPtr, size); -Thread 1 "hipMemcpy_simpl" received signal SIGSEGV, Segmentation fault. -0x000000000020f78e in simpleTest2 (numElements=4194304, usePinnedHost=true) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:104 -104 A_h1[i] = 3.14f + 1000 * i; (gdb) bt -#0 0x000000000020f78e in simpleTest2 (numElements=4194304, usePinnedHost=true) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:104 -#1 0x000000000020e96c in main (argc=, argv=) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:163 -(gdb) info thread - Id Target Id Frame -* 1 Thread 0x7ffff64c5880 (LWP 146060) "hipMemcpy_simpl" 0x000000000020f78e in simpleTest2 (numElements=4194304, usePinnedHost=true) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:104 - 2 Thread 0x7ffff64c4700 (LWP 146066) "hipMemcpy_simpl" 0x00007ffff6b0850b in ioctl - () from /lib/x86_64-linux-gnu/libc.so.6 +#0 hipMalloc (devPtr=0x7fffffffe098, size=4096) at /opt/rocm/include/hip/hip_runtime_api.h:8487 +#1 main () at app.cpp:14 + +(gdb) n +[New Thread 0x7fffeb7ff640 (LWP 1524879)] +[New Thread 0x7fffeaffe640 (LWP 1524880)] +[Thread 0x7fffeaffe640 (LWP 1524880) exited] +main () at app.cpp:15 +15 hipMemset(ptr, 0, sizeof(int) * size); + +(gdb) info threads + Id Target Id Frame +* 1 Thread 0x7ffff7e6ba80 (LWP 1524135) "app" main () at app.cpp:15 + 2 Thread 0x7fffeb7ff640 (LWP 1524879) "app" __GI___ioctl (fd=3, request=3222817548) at ../sysdeps/unix/sysv/linux/ioctl.c:36 + (gdb) thread 2 -[Switching to thread 2 (Thread 0x7ffff64c4700 (LWP 146066))] -#0 0x00007ffff6b0850b in ioctl () from /lib/x86_64-linux-gnu/libc.so.6 +[Switching to thread 2 (Thread 0x7fffeb7ff640 (LWP 1524879))] +#0 __GI___ioctl (fd=3, request=3222817548) at ../sysdeps/unix/sysv/linux/ioctl.c:36 +36 ../sysdeps/unix/sysv/linux/ioctl.c: No such file or directory. + (gdb) bt -#0 0x00007ffff6b0850b in ioctl () from /lib/x86_64-linux-gnu/libc.so.6 -#1 0x00007ffff6604568 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 -#2 0x00007ffff65fe73a in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 -#3 0x00007ffff659e4d6 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 -#4 0x00007ffff65807de in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 -#5 0x00007ffff65932a2 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 -#6 0x00007ffff654f547 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 -#7 0x00007ffff7f76609 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 -#8 0x00007ffff6b13293 in clone () from /lib/x86_64-linux-gnu/libc.so.6 -(gdb) thread 1 -[Switching to thread 1 (Thread 0x7ffff64c5880 (LWP 146060))] -#0 0x000000000020f78e in simpleTest2 (numElements=4194304, usePinnedHost=true) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:104 -104 A_h1[i] = 3.14f + 1000 * i; -(gdb) bt -#0 0x000000000020f78e in simpleTest2 (numElements=4194304, usePinnedHost=true) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:104 -#1 0x000000000020e96c in main (argc=, argv=) - at /home/test/hip/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp:163 -(gdb) +#0 __GI___ioctl (fd=3, request=3222817548) at ../sysdeps/unix/sysv/linux/ioctl.c:36 +#1 0x00007fffeb8fda80 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 +#2 0x00007fffeb8f6912 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 +#3 0x00007fffeb883021 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 +#4 0x00007fffeb85e026 in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 +#5 0x00007fffeb874b6a in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 +#6 0x00007fffeb828fdb in ?? () from /opt/rocm/lib/libhsa-runtime64.so.1 +#7 0x00007ffff5c94b43 in start_thread (arg=) at ./nptl/pthread_create.c:442 +#8 0x00007ffff5d26a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 ... ``` +A complete guide to `rocgdb` can be found [here](https://rocm.docs.amd.com/projects/ROCgdb/en/latest/). + On Windows, debugging HIP applications on IDE like Microsoft Visual Studio tools, are more informative and visible to debug codes, inspect variables, watch multiple details and examine the call stacks. ## Useful Environment Variables diff --git a/docs/user_guide/programming_manual.md b/docs/user_guide/programming_manual.md index b954dec9c8..34e6cdfdd7 100644 --- a/docs/user_guide/programming_manual.md +++ b/docs/user_guide/programming_manual.md @@ -93,7 +93,6 @@ else { } ``` Please note, the managed memory capability check may not be necessary, but if HMM is not supported, then managed malloc will fall back to using system memory and other managed memory API calls will have undefined behavior. -For more details on managed memory APIs, please refer to the documentation HIP-API.pdf, and the application at (https://github.com/ROCm-Developer-Tools/HIP/blob/rocm-4.5.x/tests/src/runtimeApi/memory/hipMallocManaged.cpp) is a sample usage. Note, managed memory management is implemented on Linux, not supported on Windows yet. @@ -139,8 +138,6 @@ HIP graph is supported. For more details, refer to the HIP API Guide. HIP-Clang now supports device-side malloc and free. This implementation does not require the use of `hipDeviceSetLimit(hipLimitMallocHeapSize,value)` nor respects any setting. The heap is fully dynamic and can grow until the available free memory on the device is consumed. -The test codes in the link (https://github.com/ROCm-Developer-Tools/HIP/blob/develop/tests/src/deviceLib/hipDeviceMalloc.cpp) show how to implement application using malloc and free functions in device kernels. - ## Use of Per-thread default stream The per-thread default stream is supported in HIP. It is an implicit stream local to both the thread and the current device. This means that the command issued to the per-thread default stream by the thread does not implicitly synchronize with other streams (like explicitly created streams), or default per-thread stream on other threads. diff --git a/include/hip/hip_ext.h b/include/hip/hip_ext.h index 09e9cf8b95..2af464ba18 100644 --- a/include/hip/hip_ext.h +++ b/include/hip/hip_ext.h @@ -133,9 +133,6 @@ extern "C" hipError_t hipExtLaunchKernel(const void* function_address, dim3 numB * launched in any order. * @param [in] args templated kernel arguments. * - * - * Please refer to the application for sample usage at, - * (https://github.com/ROCm-Developer-Tools/HIP/blob/rocm-4.5.x/tests/src/kernel/hipExtLaunchKernelGGL.cpp). */ template inline void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index e03e399104..0000000000 --- a/tests/README.md +++ /dev/null @@ -1,185 +0,0 @@ -# HIP testing environment. - -This document explains how to use the HIP CMAKE testing environment. -We make use of the HIT Integrated Tester (HIT) framework to automatically find and add test cases to the CMAKE testing environment. - -### Quickstart - -HIP unit tests are integrated into the top-level cmake project. The tests depend upon the installed version of HIP. -Typical usage (paths relative to top of the HIP repo): -``` -$ mkdir build -$ cd build -$ cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/install -$ make -$ make install -$ make build_tests -$ make test -``` - -### How to add a new test - -The test infrastructure use a hierarchy of folders. So add the new test to the appropriate folder. -The tests/src/runtimeApi/memory/hipMemset.cpp file contains a simple unit test and is a good starting point for other tests. -Copy this to a new test name and modify it. - - -### HIP Integrated Tester (HIT) - -The HIT framework automatically finds and adds test cases to the CMAKE testing environment. It achives this by parsing all files in the tests/src folder. -The parser looks for a code block similar to the one below. -``` -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * //Small copy - * TEST: %t -N 10 --memsetval 0x42 - * // Oddball size - * TEST: %t -N 10013 --memsetval 0x5a - * // Big copy - * TEST: %t -N 256M --memsetval 0xa6 - * HIT_END - */ -``` -In the above, BUILD commands provide instructions on how to build the test case while TEST commands provide instructions on how to execute the test case. - -#### BUILD command - -The supported syntax for the BUILD command is: -``` -BUILD: %t %s HIPCC_OPTIONS CLANG_OPTIONS NVCC_OPTIONS EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER DEPENDS EXCLUDE_HIP_LIB_TYPE -``` -%s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path). -%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. -HIPCC_OPTIONS: All options specified after this delimiter are passed to hipcc on both amd and nvidia platforms. -CLANG_OPTIONS: All options specified after this delimiter are passed to hipcc on HIP-Clang compiler only. -NVCC_OPTIONS: All options specified after this delimiter are passed to hipcc on nvidia platform only. -EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from amd, nvidia or both platforms. -EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from rocclr runtime. -EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from clang compiler. -EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test case from particular runtime and compiler. -EXCLUDE_HIP_LIB_TYPE: This can be used to exclude a test case from static or shared libs. -DEPENDS: This can be used to specify dependencies that need to be built before building the current target. - - -#### BUILD_CMD command - -The supported syntax for the BUILD_CMD command is: -``` -BUILD_CMD: EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER EXCLUDE_HIP_LIB_TYPE DEPENDS -``` -%s: refers to current source file name. Additional source files needed for the test can be specified by name (including relative path). -%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. -%hc: refers to hipcc pointed to by $CMAKE_INSTALL_PREFIX/bin/hipcc. -%hip-path: refers to hip installed location pointed to by $CMAKE_INSTALL_PREFIX -%cc: refers to system c compiler pointed to by /usr/bin/cc. -%cxx: refers to system c compiler pointed to by /usr/bin/c++. -%S: refers to path to current source file. -%T: refers to path to current build target. -EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from amd, nvidia or both platforms. -EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from rocclr runtime. -EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from clang compiler. -EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test from particular runtime and compiler. -EXCLUDE_HIP_LIB_TYPE: This can be used to exclude a test case from static or shared libs. -DEPENDS: This can be used to specify dependencies that need to be built before building the current target. - - -#### TEST command - -The supported syntax for the TEST command is: -``` -TEST: %t EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER EXCLUDE_HIP_LIB_TYPE -``` -%t: refers to target executable named derived by removing the extension from the current source file. Alternatively a target executable name can be specified. -EXCLUDE_HIP_PLATFORM: This can be used to exclude a test case from amd, nvidia or both platforms. -EXCLUDE_HIP_RUNTIME: This can be used to exclude a test case from rocclr runtime. -EXCLUDE_HIP_COMPILER: This can be used to exclude a test case from clang compiler. -EXCLUDE_HIP_RUNTIME AND EXCLUDE_HIP_COMPILER: when both options are specified it excludes test from particular runtime and compiler. -EXCLUDE_HIP_LIB_TYPE: This can be used to exclude a test case from static or shared libs. - -Note that if the test has been excluded for a specific platform/runtime/compiler in the BUILD command, it is automatically excluded from the TEST command as well for the sameplatform. - -#### TEST_NAMED command - -When using the TEST command, HIT will squash and append the arguments specified to the test executable name to generate the CMAKE test name. Sometimes we might want to specify a more descriptive name. The TEST_NAMED command is used for that. The supported syntax for the TEST_NAMED command is: -``` -TEST: %t CMAKE_TEST_NAME EXCLUDE_HIP_PLATFORM EXCLUDE_HIP_RUNTIME EXCLUDE_HIP_COMPILER EXCLUDE_HIP_LIB_TYPE -``` - - -### Running tests: -``` -ctest -``` - -### Run subsets of all tests: -``` -# Run one test on the commandline -./directed_tests/runtime/memory/hipMemset - -# Run all the hipMemcpy tests: -ctest -R Memcpy - -# Run all tests in a specific folder: -ctest -R memory -``` - -### Performance tests: -``` -Above tests are direct tests which are majorly used for function verification. -We also provide performance tests under tests/performance folder. - -# Build all performance tests after running "make install" under build folder: -make build_perf - -Then all performance test applications will be built into ./performance_tests folder. - -# Run all performance tests: -make perf - -# Run individual performance test: -For example, -performance_tests/memory/hipPerfMemMallocCpyFree - -# Run a specific test set: -For example, -/usr/bin/ctest -C performance -R performance_tests/perfDispatch --verbose -Here "-C performance" indicate the "performance" configuration of ctest. -``` - -### RTC Testing - -To enable RTC testing, cmake needs to be passed the `-DRTC_TESTING=1` option. - -When this option is passed, all tests that support this functionality will be run using HIP RTC to compile and run. - -To enable HIP RTC support for a specific test: - -1. Move all its kernels to `tests/catch/kernels` (one file per kernel): - 1. Kernel **functions** should use the file extension `.cpp` and include `kernels.hh` - 2. Kernel **templates** should use the file extension `.inl` -2. Update `tests/catch/kernels/CMakeLists.txt` (i.e. add the new kernel **functions** to `TEST_SRC`) -3. Update `tests/catch/include/kernels.hh`: - 1. Declare the new kernel **functions** - 2. Include the new .inl files that contain kernel **templates** - 3. Call the `FUNCTION_WRAPPER` and `TEMPLATE_WRAPPER` macros for each new function and template respectively. -4. Update `tests/catch/include/kernel_mapping.hh` with the mapping between the new files and respective function / template names. -5. Include `kernels.hh` -6. Call the `hipTest::launchKernel()` function instead of `hipLaunchKernelGGL()` - -**Note:** HIP RTC does not do implicit casting of kernel parameters. This **requires** the test writer to explicitly do all the casting before running the kernel. There is a `static_assert` inside `hipTest::launchKernel()` that checks that this was done correctly. However, due to limitations, the assertion is only performed when `-DRTC_TESTING` option is **disabled**. This means that runtime errors can occur if the casts are not performed correctly and `-DRTC_TESTING` is enabled. - -### If a test fails - how to debug a test - -Find the test and commandline that fail: - -(From the build directory, perhaps hip/build) -grep -IR hipMemcpy-modes -IR ../tests/ -../tests/src/runtimeApi/memory/hipMemcpy.cpp: * TEST_NAMED: %t hipMemcpy-modes --tests 0x1 - -# Guidelines for adding new tests - -- Prefer to enhance an existing test as opposed to writing a new one. Tests have overhead to start and many small tests spend precious test time on startup and initialization issues. -- Make the test run standalone without requirement for command-line arguments. THis makes it easier to debug since the name of the test is shown in the test report and if you know the name of the test you can the run the test. -- For long-running tests or tests with multiple phases, consider using the --tests option as an optional mechanism to allow debuggers to start with the failing subset of the test. - diff --git a/tests/Tests.cmake b/tests/Tests.cmake deleted file mode 100644 index 7b8a355824..0000000000 --- a/tests/Tests.cmake +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -############################################################################### -# Tests.cmake -############################################################################### - -# Add tests -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) -include_directories(${CMAKE_CURRENT_LIST_DIR}/src) -hit_add_directory_recursive(${HIP_CTEST_CONFIG_DEFAULT} ${CMAKE_CURRENT_LIST_DIR}/src "directed_tests") - -# Add unit tests -include_directories(${CMAKE_CURRENT_LIST_DIR}/unit) -hit_add_directory_recursive(${HIP_CTEST_CONFIG_DEFAULT} ${CMAKE_CURRENT_LIST_DIR}/unit "unit_tests") - -# Add performance tests -include_directories(${CMAKE_CURRENT_LIST_DIR}/performance) -hit_add_directory_recursive(${HIP_CTEST_CONFIG_PERFORMANCE} ${CMAKE_CURRENT_LIST_DIR}/performance "performance_tests") - -# Add top-level tests to build_tests -add_custom_target(build_tests DEPENDS directed_tests unit_tests) - -# Add top-level tests to build performance_tests. -# To build performance tests, just run "make build_perf" -add_custom_target(build_perf DEPENDS performance_tests) - -# Add custom target: perf. -# To run performance tests, just run "make perf" -add_custom_target(perf COMMAND "${CMAKE_CTEST_COMMAND}" -C "${HIP_CTEST_CONFIG_PERFORMANCE}" -R "performance_tests/" --verbose) - -# Add custom target: check -add_custom_target(check COMMAND "${CMAKE_COMMAND}" --build . --target test DEPENDS build_tests) - -# vim: ts=4:sw=4:expandtab:smartindent \ No newline at end of file diff --git a/tests/hit/HIT.cmake b/tests/hit/HIT.cmake deleted file mode 100755 index e1c8fcbc4b..0000000000 --- a/tests/hit/HIT.cmake +++ /dev/null @@ -1,506 +0,0 @@ -# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -include(CTest) -find_package(HIP REQUIRED) - -set(HIP_CTEST_CONFIG_DEFAULT "default") -set(HIP_CTEST_CONFIG_PERFORMANCE "performance") -set(HIP_LIB_TYPE "shared") -if (NOT ${BUILD_SHARED_LIBS}) - set(HIP_LIB_TYPE "static") -endif() -message(STATUS "HIP runtime lib type - ${HIP_LIB_TYPE}") -message(STATUS "CMAKE_TESTING_TOOL: ${CMAKE_TESTING_TOOL}") - -# Turn off CMAKE_HIP_ARCHITECTURES Feature if cmake version is 3.21+ -if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.21.0) - set(CMAKE_HIP_ARCHITECTURES OFF) -endif() -message(STATUS "CMAKE HIP ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}") - -# Identify the GPU Targets. -# This is done due to limitation of rocm_agent_enumerator -# While building test parallelly, rocm_agent_enumerator can fail and give out an empty target -# That results in hipcc building the test for gfx803 (the default target) -if(NOT DEFINED OFFLOAD_ARCH_STR AND EXISTS "${ROCM_PATH}/bin/rocm_agent_enumerator" - AND HIP_PLATFORM STREQUAL "amd" AND UNIX) - execute_process(COMMAND ${ROCM_PATH}/bin/rocm_agent_enumerator OUTPUT_VARIABLE HIP_GPU_ARCH - RESULT_VARIABLE ROCM_AGENT_ENUM_RESULT) - message(STATUS "ROCm Agent Enumurator Result: ${ROCM_AGENT_ENUM_RESULT}") - # Trim out gfx000 - string(REPLACE "gfx000\n" "" HIP_GPU_ARCH ${HIP_GPU_ARCH}) - if (NOT HIP_GPU_ARCH STREQUAL "") - string(LENGTH ${HIP_GPU_ARCH} HIP_GPU_ARCH_LEN) - # If string has more gfx target except gfx000 - if(${HIP_GPU_ARCH_LEN} GREATER_EQUAL 1) - string(REGEX REPLACE "\n" ";" HIP_GPU_ARCH_LIST "${HIP_GPU_ARCH}") - set(OFFLOAD_ARCH_STR "") - foreach(_hip_gpu_arch ${HIP_GPU_ARCH_LIST}) - set(OFFLOAD_ARCH_STR " ${OFFLOAD_ARCH_STR} --offload-arch=${_hip_gpu_arch} ") - endforeach() - message(STATUS "Using offload arch string: ${OFFLOAD_ARCH_STR}") - endif() - else() - message(STATUS "ROCm Agent Enumurator found no valid architectures") - endif() -else() - message(STATUS "ROCm Agent Enumurator Not Found") -endif() - -#------------------------------------------------------------------------------- -# Helper macro to parse BUILD instructions -macro(PARSE_BUILD_COMMAND _target _sources _hipcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends _dir) - set(${_target}) - set(${_sources}) - if(DEFINED OFFLOAD_ARCH_STR) - set(${_hipcc_options} "${OFFLOAD_ARCH_STR}") - else() - set(${_hipcc_options}) - endif() - set(${_clang_options}) - set(${_nvcc_options}) - set(${_link_options}) - set(${_exclude_platforms}) - set(${_exclude_runtime}) - set(${_exclude_compiler}) - set(${_exclude_lib_type}) - set(${_depends}) - - set(_target_found FALSE) - set(_flag "") - - foreach(arg ${ARGN}) - if(NOT _target_found) - set(_target_found TRUE) - set(${_target} ${arg}) - elseif("x${arg}" STREQUAL "xHIPCC_OPTIONS" - OR "x${arg}" STREQUAL "xCLANG_OPTIONS" - OR "x${arg}" STREQUAL "xNVCC_OPTIONS" - OR "x${arg}" STREQUAL "xLINK_OPTIONS" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_PLATFORM" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_RUNTIME" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_COMPILER" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE" - OR "x${arg}" STREQUAL "xDEPENDS") - set(_flag ${arg}) - elseif("x${_flag}" STREQUAL "xHIPCC_OPTIONS") - list(APPEND ${_hipcc_options} ${arg}) - elseif("x${_flag}" STREQUAL "xCLANG_OPTIONS") - list(APPEND ${_clang_options} ${arg}) - elseif("x${_flag}" STREQUAL "xNVCC_OPTIONS") - list(APPEND ${_nvcc_options} ${arg}) - elseif("x${_flag}" STREQUAL "xLINK_OPTIONS") - list(APPEND ${_link_options} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_PLATFORM") - list(APPEND ${_exclude_platforms} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_RUNTIME") - list(APPEND ${_exclude_runtime} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_COMPILER") - list(APPEND ${_exclude_compiler} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE") - list(APPEND ${_exclude_lib_type} ${arg}) - elseif("x${_flag}" STREQUAL "xDEPENDS") - list(APPEND ${_depends} ${arg}) - else() - list(APPEND ${_sources} "${_dir}/${arg}") - endif() - endforeach() -endmacro() - -# Helper macro to parse CUSTOM BUILD instructions -macro(PARSE_CUSTOMBUILD_COMMAND _target _buildcmd _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends) - set(${_target}) - set(${_buildcmd}) - set(${_exclude_platforms}) - set(${_exclude_runtime}) - set(${_exclude_compiler}) - set(${_exclude_lib_type}) - set(${_depends}) - - set(_target_found FALSE) - set(_flag "") - - foreach(arg ${ARGN}) - if(NOT _target_found) - set(_target_found TRUE) - set(${_target} ${arg}) - elseif("x${arg}" STREQUAL "xEXCLUDE_HIP_PLATFORM" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_RUNTIME" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_COMPILER" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE" - OR "x${arg}" STREQUAL "xDEPENDS") - set(_flag ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_PLATFORM") - list(APPEND ${_exclude_platforms} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_RUNTIME") - list(APPEND ${_exclude_runtime} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_COMPILER") - list(APPEND ${_exclude_compiler} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE") - list(APPEND ${_exclude_lib_type} ${arg}) - elseif("x${_flag}" STREQUAL "xDEPENDS") - list(APPEND ${_depends} ${arg}) - else() - list(APPEND ${_buildcmd} ${arg}) # always before exclude lists - endif() - endforeach() -endmacro() - -# Helper macro to parse command part of CUSTOM BUILD instructions -macro(PARSE_CUSTOMBUILD_COMMAND_PART _compiler _target _target_type _sources _options) - set(${_compiler}) - set(${_target}) - set(${_target_type} "EXECUTABLE") - set(${_sources}) - set(${_options}) - set(_compiler_found FALSE) - set(_target_found FALSE) - - foreach(arg ${ARGN}) - if(NOT _compiler_found) - set(_compiler_found TRUE) - set(${_compiler} ${arg}) - elseif("x${arg}" STREQUAL "x-o") - set(_target_found TRUE) - elseif(_target_found) - set(${_target} ${arg}) - set(_target_found FALSE) - elseif("x${arg}" STREQUAL "x-c" OR "x${arg}" STREQUAL "x--genco") - set(${_target_type} "OBJECT") - list(APPEND ${_options} ${arg}) - elseif("x${arg}" STREQUAL "x-shared") - # Note: Currently all directed_tests are linux based. - set(${_target_type} "SHARED") - list(APPEND ${_options} ${arg}) - elseif("x${arg}" MATCHES "^x-I") - # -I - list(APPEND ${_options} ${arg}) - elseif("x${arg}" MATCHES "^x.*\.cpp$") - # cpp file - list(APPEND ${_sources} ${arg}) - elseif("x${arg}" MATCHES "^x.*\.c$") - # c file - list(APPEND ${_sources} ${arg}) - else() - list(APPEND ${_options} ${arg}) - endif() - endforeach() -endmacro() - -# Helper macro to parse TEST instructions -macro(PARSE_TEST_COMMAND _target _arguments _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type) - set(${_target}) - set(${_arguments} " ") - set(${_exclude_platforms}) - set(${_exclude_runtime}) - set(${_exclude_compiler}) - set(${_exclude_lib_type}) - - set(_target_found FALSE) - set(_flag "") - - foreach(arg ${ARGN}) - if(NOT _target_found) - set(_target_found TRUE) - set(${_target} ${arg}) - elseif("x${arg}" STREQUAL "xEXCLUDE_HIP_PLATFORM" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_RUNTIME" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_COMPILER" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE") - set(_flag ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_PLATFORM") - list(APPEND ${_exclude_platforms} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_RUNTIME") - list(APPEND ${_exclude_runtime} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_COMPILER") - list(APPEND ${_exclude_compiler} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE") - list(APPEND ${_exclude_lib_type} ${arg}) - else() - list(APPEND ${_arguments} ${arg}) # always before exclude lists - endif() - endforeach() -endmacro() - -# Helper macro to parse TEST_NAMED instructions -macro(PARSE_TEST_NAMED_COMMAND _target _testname _arguments _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type) - set(${_target}) - set(${_arguments} " ") - set(${_exclude_platforms}) - set(${_exclude_runtime}) - set(${_exclude_compiler}) - set(${_exclude_lib_type}) - - set(_target_found FALSE) - set(_testname_found FALSE) - set(_flag "") - - foreach(arg ${ARGN}) - if(NOT _target_found) - set(_target_found TRUE) - set(${_target} ${arg}) - elseif(NOT _testname_found) - set(_testname_found TRUE) - set(${_testname} ${arg}) - elseif("x${arg}" STREQUAL "xEXCLUDE_HIP_PLATFORM" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_RUNTIME" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_COMPILER" - OR "x${arg}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE") - set(_flag ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_PLATFORM") - list(APPEND ${_exclude_platforms} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_RUNTIME") - list(APPEND ${_exclude_runtime} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_COMPILER") - list(APPEND ${_exclude_compiler} ${arg}) - elseif("x${_flag}" STREQUAL "xEXCLUDE_HIP_LIB_TYPE") - list(APPEND ${_exclude_lib_type} ${arg}) - else() - list(APPEND ${_arguments} ${arg}) # always before exclude lists - endif() - endforeach() -endmacro() - -# Helper macro to insert key/value pair into "hashmap" -macro(INSERT_INTO_MAP _map _key _value) - set("${_map}_${_key}" "${_value}") -endmacro() - -# Helper macro to read key/value pair from "hashmap" -macro(READ_FROM_MAP _map _key _value) - set(${_value} "${${_map}_${_key}}") -endmacro() - - -# Helper macro to generate a test -macro(GENERATE_TEST _config testname cmdline) - set(TEST_CMD_LINE ${cmdline} ${ARGN}) - if(${_config} STREQUAL ${HIP_CTEST_CONFIG_DEFAULT}) - add_test(NAME ${testname} COMMAND ${TEST_CMD_LINE}) - else() - add_test(NAME ${testname} CONFIGURATIONS ${_config} COMMAND ${TEST_CMD_LINE}) - endif() - set_tests_properties(${testname} PROPERTIES PASS_REGULAR_EXPRESSION "PASSED" ENVIRONMENT HIP_PATH=${HIP_ROOT_DIR}) - set_tests_properties(${testname} PROPERTIES SKIP_RETURN_CODE 127 ENVIRONMENT HIP_PATH=${HIP_ROOT_DIR}) -endmacro() - -# Helper macro to create a test -macro(MAKE_NAMED_TEST _config exe testname) - # to generate hip original test - set(TEST_CMD_LINE ${PROJECT_BINARY_DIR}/${exe} ${ARGN}) - generate_test(${_config} ${testname} ${TEST_CMD_LINE}) - - # to generate test with tool enabled - if(DEFINED CMAKE_TESTING_TOOL) - # arguments passing to the testing tool - # , , , - set(TOOL_CMD_LINE ${CMAKE_TESTING_TOOL} ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${TEST_CMD_LINE}) - generate_test(${_config} ${testname}.prof ${TOOL_CMD_LINE}) - endif() -endmacro() - -# Helper macro to create a test with default name -macro(MAKE_TEST _config exe) - string(REPLACE " " "" smush_args ${ARGN}) - set(testname ${exe}${smush_args}.tst) - make_named_test(${_config} ${exe} ${testname} ${ARGN}) -endmacro() -#------------------------------------------------------------------------------- - -# Macro: HIT_ADD_FILES used to scan+add multiple files for testing. -file(GLOB HIP_LIB_FILES ${HIP_PATH}/lib/*) -macro(HIT_ADD_FILES _config _dir _label _parent) - foreach (file ${ARGN}) - # Build tests - execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/hit/parser --buildCMDs ${file} - OUTPUT_VARIABLE _contents - ERROR_QUIET - WORKING_DIRECTORY ${_dir} - OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX REPLACE "\n" ";" _contents "${_contents}") - foreach(_cmd ${_contents}) - string(REGEX REPLACE " " ";" _cmd "${_cmd}") - parse_build_command(_target _sources _hipcc_options _clang_options _nvcc_options _link_options _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends ${_dir} ${_cmd}) - string(REGEX REPLACE "/" "." target ${_label}/${_target}) - if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) - insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) - insert_into_map("_exclude" "${target}" TRUE) - elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - insert_into_map("_exclude" "${target}" TRUE) - elseif(${HIP_LIB_TYPE} IN_LIST _exclude_lib_type) - insert_into_map("_exclude" "${target}" TRUE) - else() - set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - hip_reset_flags() - hip_add_executable(${target} ${_sources} HIPCC_OPTIONS ${_hipcc_options} CLANG_OPTIONS ${_clang_options} NVCC_OPTIONS ${_nvcc_options} EXCLUDE_FROM_ALL) - target_link_options(${target} PRIVATE ${_link_options}) - set_target_properties(${target} PROPERTIES OUTPUT_NAME ${_target} RUNTIME_OUTPUT_DIRECTORY ${_label} LINK_DEPENDS "${HIP_LIB_FILES}") - add_dependencies(${_parent} ${target}) - foreach(_dependency ${_depends}) - string(REGEX REPLACE "/" "." _dependency ${_label}/${_dependency}) - add_dependencies(${target} ${_dependency}) - endforeach() - endif() - endforeach() - - # Custom build commands - execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/hit/parser --customBuildCMDs ${file} - OUTPUT_VARIABLE _contents - ERROR_QUIET - WORKING_DIRECTORY ${_dir} - OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX REPLACE "\n" ";" _contents "${_contents}") - string(REGEX REPLACE "%hc" "${HIP_HIPCC_EXECUTABLE}" _contents "${_contents}") - string(REGEX REPLACE "%hip-path" "${HIP_ROOT_DIR}" _contents "${_contents}") - string(REGEX REPLACE "%rocm-path" "${ROCM_PATH}" _contents "${_contents}") - string(REGEX REPLACE "%cc" "cc" _contents "${_contents}") - string(REGEX REPLACE "%cxx" "c++" _contents "${_contents}") - string(REGEX REPLACE "%S" ${_dir} _contents "${_contents}") - string(REGEX REPLACE "%T" ${_label} _contents "${_contents}") - foreach(_cmd ${_contents}) - string(REGEX REPLACE " " ";" _cmd "${_cmd}") - parse_custombuild_command(_target _buildcmd _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type _depends ${_cmd}) - string(REGEX REPLACE "/" "." target ${_label}/${_target}) - if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) - insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - insert_into_map("_exclude" "${target}" TRUE) - elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) - insert_into_map("_exclude" "${target}" TRUE) - elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - insert_into_map("_exclude" "${target}" TRUE) - elseif(${HIP_LIB_TYPE} IN_LIST _exclude_lib_type) - insert_into_map("_exclude" "${target}" TRUE) - else() - parse_custombuild_command_part(_compiler _target_r _target_type _sources _options ${_buildcmd}) - string(REGEX REPLACE ";" " " _buildcmd "${_buildcmd}") - string(REGEX REPLACE ";" " " _options "${_options}") - - set(CHOICE_FLAG "${HIP_LIB_TYPE}" STREQUAL "static" AND "${_compiler}" MATCHES "hipcc$" - AND "${HIP_RUNTIME}" STREQUAL "rocclr" AND "${HIP_COMPILER}" STREQUAL "clang") - if (${CHOICE_FLAG} AND "${_target_type}" STREQUAL "EXECUTABLE") - # message(STATUS "hip_add_executable*:_target_r= ${_target_r} --- target= ${target} --- _sources=${_sources} --- _options=${_options}") - set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - hip_reset_flags() - hip_add_executable(${target} ${_sources} HIPCC_OPTIONS ${_options} EXCLUDE_FROM_ALL) - set_target_properties(${target} PROPERTIES OUTPUT_NAME ${_target_r} RUNTIME_OUTPUT_DIRECTORY "." LINK_DEPENDS "${HIP_LIB_FILES}") - elseif(${CHOICE_FLAG} AND "${_target_type}" STREQUAL "SHARED") - # message(STATUS "hip_add_library*:_target_r= ${_target_r} --- target= ${target} --- _sources=${_sources} --- _options=${_options}") - set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - hip_reset_flags() - hip_add_library(${target} ${_sources} HIPCC_OPTIONS ${_options} EXCLUDE_FROM_ALL ${_target_type}) - set_target_properties(${target} PROPERTIES OUTPUT_NAME ${_target_r} RUNTIME_OUTPUT_DIRECTORY "." LINK_DEPENDS "${HIP_LIB_FILES}" PREFIX "" SUFFIX "") - else() - # message(STATUS "add_custom_target*: target= ${target} _buildcmd= ${_buildcmd}") - add_custom_target(${target} COMMAND sh -c "${_buildcmd} -L${CMAKE_CURRENT_SOURCE_DIR}/build/lib -isystem ${CMAKE_CURRENT_SOURCE_DIR}/include") - endif() - add_dependencies(${_parent} ${target}) - foreach(_dependency ${_depends}) - string(REGEX REPLACE "/" "." _dependency ${_label}/${_dependency}) - add_dependencies(${target} ${_dependency}) - endforeach() - endif() - endforeach() - - # Add tests - execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/hit/parser --testCMDs ${file} - OUTPUT_VARIABLE _contents - ERROR_QUIET - WORKING_DIRECTORY ${_dir} - OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX REPLACE "\n" ";" _contents "${_contents}") - foreach(_cmd ${_contents}) - string(REGEX REPLACE " " ";" _cmd "${_cmd}") - parse_test_command(_target _arguments _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type ${_cmd}) - string(REGEX REPLACE "/" "." target ${_label}/${_target}) - read_from_map("_exclude" "${target}" _exclude_test_from_build) - if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) - elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) - elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - elseif(${HIP_LIB_TYPE} IN_LIST _exclude_lib_type) - elseif(_exclude_test_from_build STREQUAL TRUE) - else() - make_test(${_config} ${_label}/${_target} ${_arguments}) - endif() - endforeach() - - # Add named tests - execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/hit/parser --testNamedCMDs ${file} - OUTPUT_VARIABLE _contents - ERROR_QUIET - WORKING_DIRECTORY ${_dir} - OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX REPLACE "\n" ";" _contents "${_contents}") - foreach(_cmd ${_contents}) - string(REGEX REPLACE " " ";" _cmd "${_cmd}") - parse_test_named_command(_target _testname _arguments _exclude_platforms _exclude_runtime _exclude_compiler _exclude_lib_type ${_cmd}) - string(REGEX REPLACE "/" "." target ${_label}/${_target}) - read_from_map("_exclude" "${target}" _exclude_test_from_build) - if("all" IN_LIST _exclude_platforms OR ${HIP_PLATFORM} IN_LIST _exclude_platforms) - elseif(NOT _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - elseif(NOT _exclude_compiler AND ${HIP_RUNTIME} IN_LIST _exclude_runtime) - elseif(${HIP_RUNTIME} IN_LIST _exclude_runtime AND ${HIP_COMPILER} IN_LIST _exclude_compiler) - elseif(${HIP_LIB_TYPE} IN_LIST _exclude_lib_type) - elseif(_exclude_test_from_build STREQUAL TRUE) - else() - make_named_test(${_config} ${_label}/${_target} ${_label}/${_testname}.tst ${_arguments}) - endif() - endforeach() - endforeach() -endmacro() - -# Macro: HIT_ADD_DIRECTORY to scan+add all files in a directory for testing -macro(HIT_ADD_DIRECTORY _dir _label) - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${_label} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) - string(REGEX REPLACE "/" "." _parent ${_label}) - add_custom_target(${_parent}) - file(GLOB files "${_dir}/*.c*") - hit_add_files(${HIP_CTEST_CONFIG_DEFAULT} ${_dir} ${_label} ${parent} ${files}) -endmacro() - -# Macro: HIT_ADD_DIRECTORY_RECURSIVE to scan+add all files in a directory+subdirectories for testing -macro(HIT_ADD_DIRECTORY_RECURSIVE _config _dir _label) - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${_label} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) - string(REGEX REPLACE "/" "." _parent ${_label}) - add_custom_target(${_parent}) - if(${ARGC} EQUAL 4) - add_dependencies(${ARGV3} ${_parent}) - endif() - file(GLOB children RELATIVE ${_dir} ${_dir}/*) - set(dirlist "") - foreach(child ${children}) - if(IS_DIRECTORY ${_dir}/${child}) - list(APPEND dirlist ${child}) - else() - hit_add_files(${_config} ${_dir} ${_label} ${_parent} ${child}) - endif() - endforeach() - foreach(child ${dirlist}) - string(REGEX REPLACE "/" "." _parent ${_label}) - hit_add_directory_recursive(${_config} ${_dir}/${child} ${_label}/${child} ${_parent}) - endforeach() -endmacro() - -# vim: ts=4:sw=4:expandtab:smartindent diff --git a/tests/hit/parser b/tests/hit/parser deleted file mode 100755 index b493e4f6ec..0000000000 --- a/tests/hit/parser +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env perl -# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -use 5.006; use v5.10.1; -use warnings; -use File::Basename; -use File::Spec; - -my $patBUILD = "^".quotemeta(" * BUILD:"); -my $patTEST = "^".quotemeta(" * TEST:"); -my $patTEST_NAMED = "^".quotemeta(" * TEST_NAMED:"); -my $patBUILD_CMD = "^".quotemeta(" * BUILD_CMD:"); - -# Scan input file for HIT information -sub parse_file { - my $file = shift; - (my $exe = $file) =~ s/\.[^.]+$//g; - my (@buildCMDs, @testCMDs, @testNamedCMDs, @customBuildCMDs); - if (open (SOURCE, '<:encoding(UTF-8)', "$file")) { - while () { - my $line=$_; - # Look for BUILD instructions - if ($line =~ /$patBUILD/) { - $line =~ s/^ \* BUILD: //g; # Remove " * BUILD: " - $line =~ s/%s/$file/g; # Substitute %s -> filename - $line =~ s/%t/$exe/g; # Substitute %t -> targetname - $line =~ s/\R//g; # Remove line endings - push @buildCMDs, $line; - } - # Look for TEST instructions - if ($line =~ /$patTEST/) { - $line =~ s/^ \* TEST: //g; # Remove " * TEST: " - $line =~ s/%s/$file/g; # Substitute %s -> filename - $line =~ s/%t/$exe/g; # Subsitute %t -> targetname - $line =~ s/\R//g; # Remove line endings - push @testCMDs, $line; - } - # Look for TEST_NAMED instructions - if ($line =~ /$patTEST_NAMED/) { - $line =~ s/^ \* TEST_NAMED: //g;# Remove " * TEST_NAMED: " - $line =~ s/%s/$file/g; # Substitute %s -> filename - $line =~ s/%t/$exe/g; # Subsitute %t -> targetname - $line =~ s/\R//g; # Remove line endings - push @testNamedCMDs, $line; - } - # Look for BUILD_CMD instructions - if ($line =~ /$patBUILD_CMD/) { - $line =~ s/^ \* BUILD_CMD: //g; # Remove " * BUILD_CMD: " - $line =~ s/%s/$file/g; # Substitute %s -> filename - $line =~ s/%t/$exe/g; # Substitute %t -> targetname - # Substitute %hc -> /path/to/hipcc and %hip-path -> /path/to/hip happens in cmake - # Substitute %cc -> cc and %cxx -> c++ happens in cmake - # Substitute %S -> src dir and %T -> target build dir happens in cmake - $line =~ s/\R//g; # Remove line endings - push @customBuildCMDs, $line; - } - } - close(SOURCE); - } - return (\@buildCMDs, \@testCMDs, \@testNamedCMDs, \@customBuildCMDs); -} - -# Exit if no arguments specified -if(scalar @ARGV == 0){ - print "No Arguments passed, exiting ...\n"; - exit(-1); -} - -# Parse command -my @options = (); -my $retBuildCMDs = 0; -my $retTestCMDs = 0; -my $retTestNamedCMDs = 0; -my $retCustomBuildCMDs = 0; -foreach $arg (@ARGV) { - if ($retBuildCMDs or $retTestCMDs or $retTestNamedCMDs or $retCustomBuildCMDs) { - push (@options, $arg); - } - if ($arg eq '--buildCMDs') { - $retBuildCMDs = 1; - } - if ($arg eq '--testCMDs') { - $retTestCMDs = 1; - } - if ($arg eq '--testNamedCMDs') { - $retTestNamedCMDs = 1; - } - if ($arg eq '--customBuildCMDs') { - $retCustomBuildCMDs = 1; - } -} - -# Atleast one command needs to be specified -if (($retBuildCMDs eq 0) and ($retTestCMDs eq 0) and ($retTestNamedCMDs eq 0) and($retCustomBuildCMDs eq 0)) { - die "Usage: $0 <--buildCMDs|--testCMDs|--testNamedCMDs|--customBuildCMDs> FILENAMEs\n"; -} - -# Iterate over input files -foreach $file (@options) { - # Convert absolute path to path relative to working directory - my $relfile = File::Spec->abs2rel($file); - my ($buildCMDs, $testCMDs, $testNamedCMDs, $customBuildCMDs) = parse_file("$relfile"); - if ($retBuildCMDs) { - # print "BuildCMDs:\n"; - print "$_\n" for @$buildCMDs; - } - if ($retTestCMDs) { - # print "TestCMDs:\n"; - print "$_\n" for @$testCMDs; - } - if ($retTestNamedCMDs) { - # print "TestNamedCMDs:\n"; - print "$_\n" for @$testNamedCMDs; - } - if ($retCustomBuildCMDs) { - # print "CustomBuildCMDs:\n"; - print "$_\n" for @$customBuildCMDs; - } -} - -# vim: ts=4:sw=4:expandtab:smartindent diff --git a/tests/src/.gitignore b/tests/src/.gitignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp b/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp deleted file mode 100644 index f6eb78d3bc..0000000000 --- a/tests/src/Functional/device/hipFuncDeviceSynchronize.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Test for checking the functionality of - * hipError_t hipDeviceSynchronize(); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -#define _SIZE sizeof(int) * 1024 * 1024 -#define NUM_STREAMS 2 - -__global__ void Iter(int* Ad, int num) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - // Kernel loop designed to execute very slowly... ... ... so we can test timing-related - // behavior below - if (tx == 0) { - for (int i = 0; i < num; i++) { - Ad[tx] += 1; - } - } -} - -int main() { - int* A[NUM_STREAMS]; - int* Ad[NUM_STREAMS]; - hipStream_t stream[NUM_STREAMS]; - for (int i = 0; i < NUM_STREAMS; i++) { - HIPCHECK(hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault)); - A[i][0] = 1; - HIPCHECK(hipMalloc((void**)&Ad[i], _SIZE)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - for (int i = 0; i < NUM_STREAMS; i++) { - HIPCHECK(hipMemcpyAsync(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice, stream[i])); - } - for (int i = 0; i < NUM_STREAMS; i++) { - hipLaunchKernelGGL(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, stream[i], Ad[i], 1 << 30); - } - for (int i = 0; i < NUM_STREAMS; i++) { - HIPCHECK(hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, stream[i])); - } - - - // This first check but relies on the kernel running for so long that the D2H async memcopy has - // not started yet. This will be true in an optimal asynchronous implementation. Conservative - // implementations which synchronize the hipMemcpyAsync will fail, ie if - // HIP_LAUNCH_BLOCKING=true - HIPASSERT(1 << 30 != A[NUM_STREAMS - 1][0] - 1); - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(1 << 30 == A[NUM_STREAMS - 1][0] - 1); - passed(); -} diff --git a/tests/src/Functional/device/hipFuncGetDevice.cpp b/tests/src/Functional/device/hipFuncGetDevice.cpp deleted file mode 100644 index ebe06c25d6..0000000000 --- a/tests/src/Functional/device/hipFuncGetDevice.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipGetDevice(int *device); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - int numDevices = 0; - int device; - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipGetDevice(&device)); - HIPASSERT(device == i); - } - passed(); -} diff --git a/tests/src/Functional/device/hipFuncSetDevice.cpp b/tests/src/Functional/device/hipFuncSetDevice.cpp deleted file mode 100644 index c1c009e80f..0000000000 --- a/tests/src/Functional/device/hipFuncSetDevice.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t EXCLUDE_HIP_PLATFORM - * HIT_END - */ - -#include "test_common.h" - -int main() { - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - } - HIPASSERT(hipErrorInvalidDevice == hipSetDevice(numDevices)); - passed(); -} diff --git a/tests/src/Functional/device/hipFuncSetDeviceFlags.cpp b/tests/src/Functional/device/hipFuncSetDeviceFlags.cpp deleted file mode 100644 index 26ef49861a..0000000000 --- a/tests/src/Functional/device/hipFuncSetDeviceFlags.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - unsigned flag = 0; - HIPCHECK(hipDeviceReset()); - - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - - for (int j = 0; j < deviceCount; j++) { - HIPCHECK(hipSetDevice(j)); - - for (int i = 0; i < 4; i++) { - flag = 1 << i; - printf("Flag=%x\n", flag); - HIPCHECK(hipSetDeviceFlags(flag)); - // HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue); - } - - flag = 0; - } - - passed(); -} diff --git a/tests/src/Functional/host/hipFloat16.cpp b/tests/src/Functional/host/hipFloat16.cpp deleted file mode 100644 index 1d8e84977a..0000000000 --- a/tests/src/Functional/host/hipFloat16.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t 1.2 2.3 - * HIT_END - */ - -#include -#include -#include "test_common.h" - -int main(int argc, char* argv[]) { - // Testing that the compiler supports host _Float16 conversions - float init_value = atof(argv[1]); - _Float16 value_float16 = static_cast<_Float16>(init_value); - float result_value = static_cast(value_float16); - - if(std::abs(result_value - init_value) >= 0.01){ - printf("init: %f\n", init_value); - printf("result: %f\n", result_value); - printf("diff: %f\n", std::abs(result_value - init_value)); - failed("Failed host _Float16 test."); - } - - // Testing that the compiler supports host __fp16 conversions - init_value = atof(argv[2]); - __fp16 value_fp16 = static_cast<__fp16>(init_value); - result_value = static_cast(value_fp16); - - if(std::abs(result_value - init_value) >= 0.01){ - printf("init: %f\n", init_value); - printf("result: %f\n", result_value); - printf("diff: %f\n", std::abs(result_value - init_value)); - failed("Failed host __fp16 test."); - } - - passed(); -} diff --git a/tests/src/Makefile b/tests/src/Makefile deleted file mode 100644 index 829d45a08c..0000000000 --- a/tests/src/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -HIP_PATH=.. - -TARGET=hcc -include $(HIP_PATH)/examples/common/hip.prologue.make - -SOURCES = hipMemset.cpp -OBJECTS=$(SOURCES:.cpp=.o) - -EXECUTABLE=hipMemset - -$(EXECUTABLE): $(HIP_DEPS) $(OBJECTS) - $(HCC) $(HLDFLAGS) $(OBJECTS) -o $@ - -.cpp.o: - $(HCC) $(HCFLAGS) -c $< -o $@ - @$(CC) -MM -MT $@ $(CFLAGS) -c $< > $(@:.o=.d) - -clean: hip_clean - rm -rf $(EXECUTABLE) $(OBJECTS) - -include $(HIP_PATH)/examples/common/hip.epilogue.make diff --git a/tests/src/Negative/Device/hipDeviceGetAttribute.cpp b/tests/src/Negative/Device/hipDeviceGetAttribute.cpp deleted file mode 100644 index c3909f1982..0000000000 --- a/tests/src/Negative/Device/hipDeviceGetAttribute.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hipDeviceUtil.h" - -int main() { - int pi; - int attr = 0; - // hipDeviceAttribute_t attr = hipDeviceAttributeMaxThreadsPerBlock; - HIP_CHECK(hipDeviceGetAttribute(NULL, hipDeviceAttribute_t(attr), 0), hipDeviceGetAttribute); - HIP_CHECK(hipDeviceGetAttribute(&pi, hipDeviceAttribute_t(attr), 0), hipDeviceGetAttribute); - attr = -1; - HIP_CHECK(hipDeviceGetAttribute(NULL, hipDeviceAttribute_t(attr), 0), hipDeviceGetAttribute); - HIP_CHECK(hipDeviceGetAttribute(&pi, hipDeviceAttribute_t(attr), 0), hipDeviceGetAttribute); - attr = 0; - HIP_CHECK(hipDeviceGetAttribute(NULL, hipDeviceAttribute_t(attr), -1), hipDeviceGetAttribute); - HIP_CHECK(hipDeviceGetAttribute(&pi, hipDeviceAttribute_t(attr), -1), hipDeviceGetAttribute); - attr = -1; - HIP_CHECK(hipDeviceGetAttribute(NULL, hipDeviceAttribute_t(attr), -1), hipDeviceGetAttribute); - HIP_CHECK(hipDeviceGetAttribute(&pi, hipDeviceAttribute_t(attr), -1), hipDeviceGetAttribute); -} diff --git a/tests/src/Negative/Device/hipDeviceUtil.h b/tests/src/Negative/Device/hipDeviceUtil.h deleted file mode 100644 index 46b3ea7c1f..0000000000 --- a/tests/src/Negative/Device/hipDeviceUtil.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef HIPDEVICEUTIL_H -#define HIPDEVICEUTIL_H - -#include "hip/hip_runtime_api.h" -#include - -#define HIP_CHECK(status, func) \ - std::cout << #func << " returned " << hipGetErrorString(status) << " in " << __func__ \ - << " at " << __LINE__ << " in file " << __FILE__ << std::endl; - -#endif diff --git a/tests/src/Negative/Device/hipGetDevice.cpp b/tests/src/Negative/Device/hipGetDevice.cpp deleted file mode 100644 index 8826ff508e..0000000000 --- a/tests/src/Negative/Device/hipGetDevice.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hipDeviceUtil.h" - -int main() { - int device; - HIP_CHECK(hipGetDevice(NULL), hipGetDevice); - HIP_CHECK(hipGetDevice(&device), hipGetDevice); -} diff --git a/tests/src/Negative/Device/hipGetDeviceCount.cpp b/tests/src/Negative/Device/hipGetDeviceCount.cpp deleted file mode 100644 index 59509c41fc..0000000000 --- a/tests/src/Negative/Device/hipGetDeviceCount.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hipDeviceUtil.h" - -int main() { - int deviceCnt; - HIP_CHECK(hipGetDeviceCount(&deviceCnt), hipGetDeviceCount); - HIP_CHECK(hipGetDeviceCount(0), hipGetDeviceCount); -} diff --git a/tests/src/Negative/Device/hipGetDeviceProperties.cpp b/tests/src/Negative/Device/hipGetDeviceProperties.cpp deleted file mode 100644 index 1da9b70bf3..0000000000 --- a/tests/src/Negative/Device/hipGetDeviceProperties.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hipDeviceUtil.h" - -int main() { - hipDeviceProp_t props; - HIP_CHECK(hipGetDeviceProperties(&props, 0), hipGetDeviceProperties); - HIP_CHECK(hipGetDeviceProperties(NULL, 0), hipGetDeviceProperties); - HIP_CHECK(hipGetDeviceProperties(NULL, -1), hipGetDeviceProperties); - HIP_CHECK(hipGetDeviceProperties(&props, -1), hipGetDeviceProperties); - HIP_CHECK(hipGetDeviceProperties(NULL, 1024), hipGetDeviceProperties); - HIP_CHECK(hipGetDeviceProperties(&props, 1024), hipGetDeviceProperties); -} diff --git a/tests/src/Negative/Device/hipSetDevice.cpp b/tests/src/Negative/Device/hipSetDevice.cpp deleted file mode 100644 index ec0549e760..0000000000 --- a/tests/src/Negative/Device/hipSetDevice.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "test_common.h" - -int main() { - int numDevices = 0; - - HIPCHECK_API(hipGetDeviceCount(&numDevices), hipSuccess); - if (numDevices > 0) { - for (int deviceId = 0; deviceId < numDevices; deviceId++) { - HIPCHECK_API(hipSetDevice(deviceId), hipSuccess); - } - HIPCHECK_API(hipSetDevice(numDevices), hipErrorInvalidDevice); - HIPCHECK_API(hipSetDevice(-1), hipErrorInvalidDevice); - } - else { - failed("Error: failed to find any compatible devices."); - } - - passed(); -} diff --git a/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp deleted file mode 100644 index 6a96db06e6..0000000000 --- a/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define SIZE 1024 - -int main(){ - - void *Sd; - hipError_t e; - char S[SIZE]="This is not a device symbol"; - - HIPCHECK(hipMalloc(&Sd,SIZE)); - - e = hipMemcpyFromSymbol(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost); - HIPASSERT(e==hipErrorInvalidSymbol); - - e = hipMemcpyFromSymbol(S, NULL, SIZE, 0, hipMemcpyDeviceToHost); - HIPASSERT(e==hipErrorInvalidSymbol); - - HIPCHECK(hipFree(Sd)); - - passed(); -} diff --git a/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp deleted file mode 100644 index e9b64a16a8..0000000000 --- a/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define SIZE 1024 - -int main(){ - - void *Sd; - hipError_t e; - char S[SIZE]="This is not a device symbol"; - - HIPCHECK(hipMalloc(&Sd,SIZE)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - e = hipMemcpyFromSymbolAsync(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost, stream); - HIPASSERT(e==hipErrorInvalidSymbol); - - e = hipMemcpyFromSymbolAsync(S, NULL, SIZE, 0, hipMemcpyDeviceToHost, stream); - HIPASSERT(e==hipErrorInvalidSymbol); - - HIPCHECK(hipFree(Sd)); - - passed(); -} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbol.cpp b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp deleted file mode 100644 index 3a9e5dfdb9..0000000000 --- a/tests/src/Negative/memory/hipMemcpyToSymbol.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define SIZE 1024 - -int main(){ - - void *Sd; - hipError_t e; - char S[SIZE]="This is not a device symbol"; - - HIPCHECK(hipMalloc(&Sd,SIZE)); - - e = hipMemcpyToSymbol(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice); - HIPASSERT(e==hipErrorInvalidSymbol); - - e = hipMemcpyToSymbol(NULL, S, SIZE, 0, hipMemcpyHostToDevice); - HIPASSERT(e==hipErrorInvalidSymbol); - - HIPCHECK(hipFree(Sd)); - - passed(); -} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp deleted file mode 100644 index 8b7d479dc8..0000000000 --- a/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define SIZE 100 - -int main(){ - - void *Sd; - hipError_t e; - char S[SIZE]="This is not a device symbol"; - - HIPCHECK(hipMalloc(&Sd,SIZE)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - e = hipMemcpyToSymbolAsync(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice, stream); - HIPASSERT(e==hipErrorInvalidSymbol); - - e = hipMemcpyToSymbolAsync(NULL, S, SIZE, 0, hipMemcpyHostToDevice, stream); - HIPASSERT(e==hipErrorInvalidSymbol); - - HIPCHECK(hipFree(Sd)); - - passed(); -} diff --git a/tests/src/Negative/memory/hipMemory.cpp b/tests/src/Negative/memory/hipMemory.cpp deleted file mode 100644 index 34825d1e5b..0000000000 --- a/tests/src/Negative/memory/hipMemory.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_RUNTIME rocclr - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define SIZE 100 - -int main(){ - hipError_t e; - char str[SIZE]="Hi, I am Ellesemere. What is ur name?"; - - e = hipMemcpy(0, str, SIZE, hipMemcpyHostToDevice); - HIPASSERT(e==hipErrorInvalidValue); - - e = hipMemcpy(NULL, str, SIZE, hipMemcpyHostToDevice); - HIPASSERT(e==hipErrorInvalidValue); - - e = hipMemset(0,99,80); - HIPASSERT(e==hipErrorInvalidValue); - - passed(); -} diff --git a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp deleted file mode 100644 index 4d22cee103..0000000000 --- a/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_RUNTIME rocclr - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main(){ - - hipError_t e; - hipStream_t stream; - - e = hipStreamCreateWithFlags(&stream, -1); - HIPASSERT(e==hipErrorInvalidValue); - - e = hipStreamCreateWithFlags(&stream, 2); - HIPASSERT(e==hipErrorInvalidValue); - - passed(); -} diff --git a/tests/src/clara/clara.hpp b/tests/src/clara/clara.hpp deleted file mode 100644 index 10b70da644..0000000000 --- a/tests/src/clara/clara.hpp +++ /dev/null @@ -1,1165 +0,0 @@ -// v1.0-develop.2 -// See https://github.com/philsquared/Clara - -#ifndef CLARA_HPP_INCLUDED -#define CLARA_HPP_INCLUDED - -#ifndef CLARA_CONFIG_CONSOLE_WIDTH -#define CLARA_CONFIG_CONSOLE_WIDTH 80 -#endif - -#ifndef CLARA_TEXTFLOW_CONFIG_CONSOLE_WIDTH -#define CLARA_TEXTFLOW_CONFIG_CONSOLE_WIDTH CLARA_CONFIG_CONSOLE_WIDTH -#endif - -// ----------- #included from clara_textflow.hpp ----------- - -// TextFlowCpp -// -// A single-header library for wrapping and laying out basic text, by Phil Nash -// -// This work is licensed under the BSD 2-Clause license. -// See the accompanying LICENSE file, or the one at https://opensource.org/licenses/BSD-2-Clause -// -// This project is hosted at https://github.com/philsquared/textflowcpp - -#ifndef CLARA_TEXTFLOW_HPP_INCLUDED -#define CLARA_TEXTFLOW_HPP_INCLUDED - -#include -#include -#include -#include - -#ifndef CLARA_TEXTFLOW_CONFIG_CONSOLE_WIDTH -#define CLARA_TEXTFLOW_CONFIG_CONSOLE_WIDTH 80 -#endif - - -namespace clara { -namespace TextFlow { - -inline auto isWhitespace(char c) -> bool { - static std::string chars = " \t\n\r"; - return chars.find(c) != std::string::npos; -} -inline auto isBreakableBefore(char c) -> bool { - static std::string chars = "[({<|"; - return chars.find(c) != std::string::npos; -} -inline auto isBreakableAfter(char c) -> bool { - static std::string chars = "])}>.,:;*+-=&/\\"; - return chars.find(c) != std::string::npos; -} - -class Columns; - -class Column { - std::vector m_strings; - size_t m_width = CLARA_TEXTFLOW_CONFIG_CONSOLE_WIDTH; - size_t m_indent = 0; - size_t m_initialIndent = std::string::npos; - - public: - class iterator { - friend Column; - - Column const& m_column; - size_t m_stringIndex = 0; - size_t m_pos = 0; - - size_t m_len = 0; - size_t m_end = 0; - bool m_suffix = false; - - iterator(Column const& column, size_t stringIndex) - : m_column(column), m_stringIndex(stringIndex) {} - - auto line() const -> std::string const& { return m_column.m_strings[m_stringIndex]; } - - auto isBoundary(size_t at) const -> bool { - assert(at > 0); - assert(at <= line().size()); - - return at == line().size() || - (isWhitespace(line()[at]) && !isWhitespace(line()[at - 1])) || - isBreakableBefore(line()[at]) || isBreakableAfter(line()[at - 1]); - } - - void calcLength() { - assert(m_stringIndex < m_column.m_strings.size()); - - m_suffix = false; - auto width = m_column.m_width - indent(); - m_end = m_pos; - while (m_end < line().size() && line()[m_end] != '\n') ++m_end; - - if (m_end < m_pos + width) { - m_len = m_end - m_pos; - } else { - size_t len = width; - while (len > 0 && !isBoundary(m_pos + len)) --len; - while (len > 0 && isWhitespace(line()[m_pos + len - 1])) --len; - - if (len > 0) { - m_len = len; - } else { - m_suffix = true; - m_len = width - 1; - } - } - } - - auto indent() const -> size_t { - auto initial = - m_pos == 0 && m_stringIndex == 0 ? m_column.m_initialIndent : std::string::npos; - return initial == std::string::npos ? m_column.m_indent : initial; - } - - auto addIndentAndSuffix(std::string const& plain) const -> std::string { - return std::string(indent(), ' ') + (m_suffix ? plain + "-" : plain); - } - - public: - explicit iterator(Column const& column) : m_column(column) { - assert(m_column.m_width > m_column.m_indent); - assert(m_column.m_initialIndent == std::string::npos || - m_column.m_width > m_column.m_initialIndent); - calcLength(); - if (m_len == 0) m_stringIndex++; // Empty string - } - - auto operator*() const -> std::string { - assert(m_stringIndex < m_column.m_strings.size()); - assert(m_pos <= m_end); - if (m_pos + m_column.m_width < m_end) - return addIndentAndSuffix(line().substr(m_pos, m_len)); - else - return addIndentAndSuffix(line().substr(m_pos, m_end - m_pos)); - } - - auto operator++() -> iterator& { - m_pos += m_len; - if (m_pos < line().size() && line()[m_pos] == '\n') - m_pos += 1; - else - while (m_pos < line().size() && isWhitespace(line()[m_pos])) ++m_pos; - - if (m_pos == line().size()) { - m_pos = 0; - ++m_stringIndex; - } - if (m_stringIndex < m_column.m_strings.size()) calcLength(); - return *this; - } - auto operator++(int) -> iterator { - iterator prev(*this); - operator++(); - return prev; - } - - auto operator==(iterator const& other) const -> bool { - return m_pos == other.m_pos && m_stringIndex == other.m_stringIndex && - &m_column == &other.m_column; - } - auto operator!=(iterator const& other) const -> bool { return !operator==(other); } - }; - using const_iterator = iterator; - - explicit Column(std::string const& text) { m_strings.push_back(text); } - - auto width(size_t newWidth) -> Column& { - assert(newWidth > 0); - m_width = newWidth; - return *this; - } - auto indent(size_t newIndent) -> Column& { - m_indent = newIndent; - return *this; - } - auto initialIndent(size_t newIndent) -> Column& { - m_initialIndent = newIndent; - return *this; - } - - auto width() const -> size_t { return m_width; } - auto begin() const -> iterator { return iterator(*this); } - auto end() const -> iterator { return {*this, m_strings.size()}; } - - inline friend std::ostream& operator<<(std::ostream& os, Column const& col) { - bool first = true; - for (auto line : col) { - if (first) - first = false; - else - os << "\n"; - os << line; - } - return os; - } - - auto operator+(Column const& other) -> Columns; - - auto toString() const -> std::string { - std::ostringstream oss; - oss << *this; - return oss.str(); - } -}; - -class Spacer : public Column { - public: - explicit Spacer(size_t spaceWidth) : Column("") { width(spaceWidth); } -}; - -class Columns { - std::vector m_columns; - - public: - class iterator { - friend Columns; - struct EndTag {}; - - std::vector const& m_columns; - std::vector m_iterators; - size_t m_activeIterators; - - iterator(Columns const& columns, EndTag) - : m_columns(columns.m_columns), m_activeIterators(0) { - m_iterators.reserve(m_columns.size()); - - for (auto const& col : m_columns) m_iterators.push_back(col.end()); - } - - public: - explicit iterator(Columns const& columns) - : m_columns(columns.m_columns), m_activeIterators(m_columns.size()) { - m_iterators.reserve(m_columns.size()); - - for (auto const& col : m_columns) m_iterators.push_back(col.begin()); - } - - auto operator==(iterator const& other) const -> bool { - return m_iterators == other.m_iterators; - } - auto operator!=(iterator const& other) const -> bool { - return m_iterators != other.m_iterators; - } - auto operator*() const -> std::string { - std::string row, padding; - - for (size_t i = 0; i < m_columns.size(); ++i) { - auto width = m_columns[i].width(); - if (m_iterators[i] != m_columns[i].end()) { - std::string col = *m_iterators[i]; - row += padding + col; - if (col.size() < width) - padding = std::string(width - col.size(), ' '); - else - padding = ""; - } else { - padding += std::string(width, ' '); - } - } - return row; - } - auto operator++() -> iterator& { - for (size_t i = 0; i < m_columns.size(); ++i) { - if (m_iterators[i] != m_columns[i].end()) ++m_iterators[i]; - } - return *this; - } - auto operator++(int) -> iterator { - iterator prev(*this); - operator++(); - return prev; - } - }; - using const_iterator = iterator; - - auto begin() const -> iterator { return iterator(*this); } - auto end() const -> iterator { return {*this, iterator::EndTag()}; } - - auto operator+=(Column const& col) -> Columns& { - m_columns.push_back(col); - return *this; - } - auto operator+(Column const& col) -> Columns { - Columns combined = *this; - combined += col; - return combined; - } - - inline friend std::ostream& operator<<(std::ostream& os, Columns const& cols) { - bool first = true; - for (auto line : cols) { - if (first) - first = false; - else - os << "\n"; - os << line; - } - return os; - } - - auto toString() const -> std::string { - std::ostringstream oss; - oss << *this; - return oss.str(); - } -}; - -inline auto Column::operator+(Column const& other) -> Columns { - Columns cols; - cols += *this; - cols += other; - return cols; -} -} // namespace TextFlow -} // namespace clara - -#endif // CLARA_TEXTFLOW_HPP_INCLUDED - -// ----------- end of #include from clara_textflow.hpp ----------- -// ........... back in clara.hpp - - -#include -#include -#include - -#if !defined(CLARA_PLATFORM_WINDOWS) && \ - (defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)) -#define CLARA_PLATFORM_WINDOWS -#endif - -namespace clara { -namespace detail { - -// Traits for extracting arg and return type of lambdas (for single argument lambdas) -template -struct UnaryLambdaTraits : UnaryLambdaTraits {}; - -template -struct UnaryLambdaTraits { - static const bool isValid = false; -}; - -template -struct UnaryLambdaTraits { - static const bool isValid = true; - using ArgType = typename std::remove_const::type>::type; - ; - using ReturnType = ReturnT; -}; - -class TokenStream; - -// Transport for raw args (copied from main args, or supplied via init list for testing) -class Args { - friend TokenStream; - std::string m_exeName; - std::vector m_args; - - public: - Args(int argc, char* argv[]) { - m_exeName = argv[0]; - for (int i = 1; i < argc; ++i) m_args.push_back(argv[i]); - } - - Args(std::initializer_list args) - : m_exeName(*args.begin()), m_args(args.begin() + 1, args.end()) {} - - auto exeName() const -> std::string { return m_exeName; } -}; - -// Wraps a token coming from a token stream. These may not directly correspond to strings as a -// single string may encode an option + its argument if the : or = form is used -enum class TokenType { Option, Argument }; -struct Token { - TokenType type; - std::string token; -}; - -inline auto isOptPrefix(char c) -> bool { - return c == '-' -#ifdef CLARA_PLATFORM_WINDOWS - || c == '/' -#endif - ; -} - -// Abstracts iterators into args as a stream of tokens, with option arguments uniformly handled -class TokenStream { - using Iterator = std::vector::const_iterator; - Iterator it; - Iterator itEnd; - std::vector m_tokenBuffer; - - void loadBuffer() { - m_tokenBuffer.resize(0); - - // Skip any empty strings - while (it != itEnd && it->empty()) ++it; - - if (it != itEnd) { - auto const& next = *it; - if (isOptPrefix(next[0])) { - auto delimiterPos = next.find_first_of(" :="); - if (delimiterPos != std::string::npos) { - m_tokenBuffer.push_back({TokenType::Option, next.substr(0, delimiterPos)}); - m_tokenBuffer.push_back({TokenType::Argument, next.substr(delimiterPos + 1)}); - } else { - if (next[1] != '-' && next.size() > 2) { - std::string opt = "- "; - for (size_t i = 1; i < next.size(); ++i) { - opt[1] = next[i]; - m_tokenBuffer.push_back({TokenType::Option, opt}); - } - } else { - m_tokenBuffer.push_back({TokenType::Option, next}); - } - } - } else { - m_tokenBuffer.push_back({TokenType::Argument, next}); - } - } - } - - public: - explicit TokenStream(Args const& args) : TokenStream(args.m_args.begin(), args.m_args.end()) {} - - TokenStream(Iterator it, Iterator itEnd) : it(it), itEnd(itEnd) { loadBuffer(); } - - explicit operator bool() const { return !m_tokenBuffer.empty() || it != itEnd; } - - auto count() const -> size_t { return m_tokenBuffer.size() + (itEnd - it); } - - auto operator*() const -> Token { - assert(!m_tokenBuffer.empty()); - return m_tokenBuffer.front(); - } - - auto operator-> () const -> Token const* { - assert(!m_tokenBuffer.empty()); - return &m_tokenBuffer.front(); - } - - auto operator++() -> TokenStream& { - if (m_tokenBuffer.size() >= 2) { - m_tokenBuffer.erase(m_tokenBuffer.begin()); - } else { - if (it != itEnd) ++it; - loadBuffer(); - } - return *this; - } -}; - - -class ResultBase { - public: - enum Type { Ok, LogicError, RuntimeError }; - - protected: - ResultBase(Type type) : m_type(type) {} - virtual ~ResultBase() = default; - - virtual void enforceOk() const = 0; - - Type m_type; -}; - -template -class ResultValueBase : public ResultBase { - public: - auto value() const -> T const& { - enforceOk(); - return m_value; - } - - protected: - ResultValueBase(Type type) : ResultBase(type) {} - - ResultValueBase(ResultValueBase const& other) : ResultBase(other) { - if (m_type == ResultBase::Ok) new (&m_value) T(other.m_value); - } - - ResultValueBase(Type, T const& value) : ResultBase(Ok) { new (&m_value) T(value); } - - auto operator=(ResultValueBase const& other) -> ResultValueBase& { - if (m_type == ResultBase::Ok) m_value.~T(); - ResultBase::operator=(other); - if (m_type == ResultBase::Ok) new (&m_value) T(other.m_value); - return *this; - } - - ~ResultValueBase() { - if (m_type == Ok) m_value.~T(); - } - - union { - T m_value; - }; -}; - -template <> -class ResultValueBase : public ResultBase { - protected: - using ResultBase::ResultBase; -}; - -template -class BasicResult : public ResultValueBase { - public: - template - explicit BasicResult(BasicResult const& other) - : ResultValueBase(other.type()), m_errorMessage(other.errorMessage()) { - assert(type() != ResultBase::Ok); - } - - template - static auto ok(U const& value) -> BasicResult { - return {ResultBase::Ok, value}; - } - static auto ok() -> BasicResult { return {ResultBase::Ok}; } - static auto logicError(std::string const& message) -> BasicResult { - return {ResultBase::LogicError, message}; - } - static auto runtimeError(std::string const& message) -> BasicResult { - return {ResultBase::RuntimeError, message}; - } - - explicit operator bool() const { return m_type == ResultBase::Ok; } - auto type() const -> ResultBase::Type { return m_type; } - auto errorMessage() const -> std::string { return m_errorMessage; } - - protected: - virtual void enforceOk() const { - // !TBD: If no exceptions, std::terminate here or something - switch (m_type) { - case ResultBase::LogicError: - throw std::logic_error(m_errorMessage); - case ResultBase::RuntimeError: - throw std::runtime_error(m_errorMessage); - case ResultBase::Ok: - break; - } - } - - std::string m_errorMessage; // Only populated if resultType is an error - - BasicResult(ResultBase::Type type, std::string const& message) - : ResultValueBase(type), m_errorMessage(message) { - assert(m_type != ResultBase::Ok); - } - - using ResultValueBase::ResultValueBase; - using ResultBase::m_type; -}; - -enum class ParseResultType { Matched, NoMatch, ShortCircuitAll, ShortCircuitSame }; - -class ParseState { - public: - ParseState(ParseResultType type, TokenStream const& remainingTokens) - : m_type(type), m_remainingTokens(remainingTokens) {} - - auto type() const -> ParseResultType { return m_type; } - auto remainingTokens() const -> TokenStream { return m_remainingTokens; } - - private: - ParseResultType m_type; - TokenStream m_remainingTokens; -}; - -using Result = BasicResult; -using ParserResult = BasicResult; -using InternalParseResult = BasicResult; - -struct HelpColumns { - std::string left; - std::string right; -}; - -template -inline auto convertInto(std::string const& source, T& target) -> ParserResult { - std::stringstream ss; - ss << source; - ss >> target; - if (ss.fail()) - return ParserResult::runtimeError("Unable to convert '" + source + "' to destination type"); - else - return ParserResult::ok(ParseResultType::Matched); -} -inline auto convertInto(std::string const& source, std::string& target) -> ParserResult { - target = source; - return ParserResult::ok(ParseResultType::Matched); -} -inline auto convertInto(std::string const& source, bool& target) -> ParserResult { - std::string srcLC = source; - std::transform(srcLC.begin(), srcLC.end(), srcLC.begin(), - [](char c) { return static_cast(::tolower(c)); }); - if (srcLC == "y" || srcLC == "1" || srcLC == "true" || srcLC == "yes" || srcLC == "on") - target = true; - else if (srcLC == "n" || srcLC == "0" || srcLC == "false" || srcLC == "no" || srcLC == "off") - target = false; - else - return ParserResult::runtimeError("Expected a boolean value but did not recognise: '" + - source + "'"); - return ParserResult::ok(ParseResultType::Matched); -} - -struct BoundRefBase { - BoundRefBase() = default; - BoundRefBase(BoundRefBase const&) = delete; - BoundRefBase(BoundRefBase&&) = delete; - BoundRefBase& operator=(BoundRefBase const&) = delete; - BoundRefBase& operator=(BoundRefBase&&) = delete; - - virtual ~BoundRefBase() = default; - - virtual auto isFlag() const -> bool = 0; - virtual auto isContainer() const -> bool { return false; } - virtual auto setValue(std::string const& arg) -> ParserResult = 0; - virtual auto setFlag(bool flag) -> ParserResult = 0; -}; - -struct BoundValueRefBase : BoundRefBase { - auto isFlag() const -> bool override { return false; } - - auto setFlag(bool) -> ParserResult override { - return ParserResult::logicError("Flags can only be set on boolean fields"); - } -}; - -struct BoundFlagRefBase : BoundRefBase { - auto isFlag() const -> bool override { return true; } - - auto setValue(std::string const& arg) -> ParserResult override { - bool flag; - auto result = convertInto(arg, flag); - if (result) setFlag(flag); - return result; - } -}; - -template -struct BoundRef : BoundValueRefBase { - T& m_ref; - - explicit BoundRef(T& ref) : m_ref(ref) {} - - auto setValue(std::string const& arg) -> ParserResult override { - return convertInto(arg, m_ref); - } -}; - -template -struct BoundRef> : BoundValueRefBase { - std::vector& m_ref; - - explicit BoundRef(std::vector& ref) : m_ref(ref) {} - - auto isContainer() const -> bool override { return true; } - - auto setValue(std::string const& arg) -> ParserResult override { - T temp; - auto result = convertInto(arg, temp); - if (result) m_ref.push_back(temp); - return result; - } -}; - -struct BoundFlagRef : BoundFlagRefBase { - bool& m_ref; - - explicit BoundFlagRef(bool& ref) : m_ref(ref) {} - - auto setFlag(bool flag) -> ParserResult override { - m_ref = flag; - return ParserResult::ok(ParseResultType::Matched); - } -}; - -template -struct LambdaInvoker { - static_assert(std::is_same::value, - "Lambda must return void or clara::ParserResult"); - - template - static auto invoke(L const& lambda, ArgType const& arg) -> ParserResult { - return lambda(arg); - } -}; - -template <> -struct LambdaInvoker { - template - static auto invoke(L const& lambda, ArgType const& arg) -> ParserResult { - lambda(arg); - return ParserResult::ok(ParseResultType::Matched); - } -}; - -template -inline auto invokeLambda(L const& lambda, std::string const& arg) -> ParserResult { - ArgType temp; - auto result = convertInto(arg, temp); - return !result ? result - : LambdaInvoker::ReturnType>::invoke(lambda, temp); -}; - - -template -struct BoundLambda : BoundValueRefBase { - L m_lambda; - - static_assert(UnaryLambdaTraits::isValid, "Supplied lambda must take exactly one argument"); - explicit BoundLambda(L const& lambda) : m_lambda(lambda) {} - - auto setValue(std::string const& arg) -> ParserResult override { - return invokeLambda::ArgType>(m_lambda, arg); - } -}; - -template -struct BoundFlagLambda : BoundFlagRefBase { - L m_lambda; - - static_assert(UnaryLambdaTraits::isValid, "Supplied lambda must take exactly one argument"); - static_assert(std::is_same::ArgType, bool>::value, - "flags must be boolean"); - - explicit BoundFlagLambda(L const& lambda) : m_lambda(lambda) {} - - auto setFlag(bool flag) -> ParserResult override { - return LambdaInvoker::ReturnType>::invoke(m_lambda, flag); - } -}; - -enum class Optionality { Optional, Required }; - -struct Parser; - -class ParserBase { - public: - virtual ~ParserBase() = default; - virtual auto validate() const -> Result { return Result::ok(); } - virtual auto parse(std::string const& exeName, TokenStream const& tokens) const - -> InternalParseResult = 0; - virtual auto cardinality() const -> size_t { return 1; } - - auto parse(Args const& args) const -> InternalParseResult { - return parse(args.exeName(), TokenStream(args)); - } -}; - -template -class ComposableParserImpl : public ParserBase { - public: - template - auto operator|(T const& other) const -> Parser; -}; - -// Common code and state for Args and Opts -template -class ParserRefImpl : public ComposableParserImpl { - protected: - Optionality m_optionality = Optionality::Optional; - std::shared_ptr m_ref; - std::string m_hint; - std::string m_description; - - explicit ParserRefImpl(std::shared_ptr const& ref) : m_ref(ref) {} - - public: - template - ParserRefImpl(T& ref, std::string const& hint) - : m_ref(std::make_shared>(ref)), m_hint(hint) {} - - template - ParserRefImpl(LambdaT const& ref, std::string const& hint) - : m_ref(std::make_shared>(ref)), m_hint(hint) {} - - auto operator()(std::string const& description) -> DerivedT& { - m_description = description; - return static_cast(*this); - } - - auto optional() -> DerivedT& { - m_optionality = Optionality::Optional; - return static_cast(*this); - }; - - auto required() -> DerivedT& { - m_optionality = Optionality::Required; - return static_cast(*this); - }; - - auto isOptional() const -> bool { return m_optionality == Optionality::Optional; } - - auto cardinality() const -> size_t override { - if (m_ref->isContainer()) - return 0; - else - return 1; - } - - auto hint() const -> std::string { return m_hint; } -}; - -class ExeName : public ComposableParserImpl { - std::shared_ptr m_name; - std::shared_ptr m_ref; - - template - static auto makeRef(LambdaT const& lambda) -> std::shared_ptr { - return std::make_shared>(lambda); - } - - public: - ExeName() : m_name(std::make_shared("")) {} - - explicit ExeName(std::string& ref) : ExeName() { - m_ref = std::make_shared>(ref); - } - - template - explicit ExeName(LambdaT const& lambda) : ExeName() { - m_ref = std::make_shared>(lambda); - } - - // The exe name is not parsed out of the normal tokens, but is handled specially - auto parse(std::string const&, TokenStream const& tokens) const - -> InternalParseResult override { - return InternalParseResult::ok(ParseState(ParseResultType::NoMatch, tokens)); - } - - auto name() const -> std::string { return *m_name; } - auto set(std::string const& newName) -> ParserResult { - auto lastSlash = newName.find_last_of("\\/"); - auto filename = (lastSlash == std::string::npos) ? newName : newName.substr(lastSlash + 1); - - *m_name = filename; - if (m_ref) - return m_ref->setValue(filename); - else - return ParserResult::ok(ParseResultType::Matched); - } -}; - -class Arg : public ParserRefImpl { - public: - using ParserRefImpl::ParserRefImpl; - - auto parse(std::string const&, TokenStream const& tokens) const - -> InternalParseResult override { - auto validationResult = validate(); - if (!validationResult) return InternalParseResult(validationResult); - - auto remainingTokens = tokens; - auto const& token = *remainingTokens; - if (token.type != TokenType::Argument) - return InternalParseResult::ok(ParseState(ParseResultType::NoMatch, remainingTokens)); - - auto result = m_ref->setValue(remainingTokens->token); - if (!result) - return InternalParseResult(result); - else - return InternalParseResult::ok(ParseState(ParseResultType::Matched, ++remainingTokens)); - } -}; - -inline auto normaliseOpt(std::string const& optName) -> std::string { -#ifdef CLARA_PLATFORM_WINDOWS - if (optName[0] == '/') - return "-" + optName.substr(1); - else -#endif - return optName; -} - -class Opt : public ParserRefImpl { - protected: - std::vector m_optNames; - - public: - template - explicit Opt(LambdaT const& ref) - : ParserRefImpl(std::make_shared>(ref)) {} - - explicit Opt(bool& ref) : ParserRefImpl(std::make_shared(ref)) {} - - template - Opt(LambdaT const& ref, std::string const& hint) : ParserRefImpl(ref, hint) {} - - template - Opt(T& ref, std::string const& hint) : ParserRefImpl(ref, hint) {} - - auto operator[](std::string const& optName) -> Opt& { - m_optNames.push_back(optName); - return *this; - } - - auto getHelpColumns() const -> std::vector { - std::ostringstream oss; - bool first = true; - for (auto const& opt : m_optNames) { - if (first) - first = false; - else - oss << ", "; - oss << opt; - } - if (!m_hint.empty()) oss << " <" << m_hint << ">"; - return {{oss.str(), m_description}}; - } - - auto isMatch(std::string const& optToken) const -> bool { - auto normalisedToken = normaliseOpt(optToken); - for (auto const& name : m_optNames) { - if (normaliseOpt(name) == normalisedToken) return true; - } - return false; - } - - using ParserBase::parse; - - auto parse(std::string const&, TokenStream const& tokens) const - -> InternalParseResult override { - auto validationResult = validate(); - if (!validationResult) return InternalParseResult(validationResult); - - auto remainingTokens = tokens; - if (remainingTokens && remainingTokens->type == TokenType::Option) { - auto const& token = *remainingTokens; - if (isMatch(token.token)) { - if (m_ref->isFlag()) { - auto result = m_ref->setFlag(true); - if (!result) return InternalParseResult(result); - if (result.value() == ParseResultType::ShortCircuitAll) - return InternalParseResult::ok(ParseState(result.value(), remainingTokens)); - } else { - ++remainingTokens; - if (!remainingTokens) - return InternalParseResult::runtimeError("Expected argument following " + - token.token); - auto const& argToken = *remainingTokens; - if (argToken.type != TokenType::Argument) - return InternalParseResult::runtimeError("Expected argument following " + - token.token); - auto result = m_ref->setValue(argToken.token); - if (!result) return InternalParseResult(result); - if (result.value() == ParseResultType::ShortCircuitAll) - return InternalParseResult::ok(ParseState(result.value(), remainingTokens)); - } - return InternalParseResult::ok( - ParseState(ParseResultType::Matched, ++remainingTokens)); - } - } - return InternalParseResult::ok(ParseState(ParseResultType::NoMatch, remainingTokens)); - } - - auto validate() const -> Result override { - if (m_optNames.empty()) return Result::logicError("No options supplied to Opt"); - for (auto const& name : m_optNames) { - if (name.empty()) return Result::logicError("Option name cannot be empty"); -#ifdef CLARA_PLATFORM_WINDOWS - if (name[0] != '-' && name[0] != '/') - return Result::logicError("Option name must begin with '-' or '/'"); -#else - if (name[0] != '-') return Result::logicError("Option name must begin with '-'"); -#endif - } - return ParserRefImpl::validate(); - } -}; - -struct Help : Opt { - Help(bool& showHelpFlag) - : Opt([&](bool flag) { - showHelpFlag = flag; - return ParserResult::ok(ParseResultType::ShortCircuitAll); - }) { - static_cast (*this)("display usage information")["-?"]["-h"]["--help"].optional(); - } -}; - - -struct Parser : ParserBase { - mutable ExeName m_exeName; - std::vector m_options; - std::vector m_args; - - auto operator|=(ExeName const& exeName) -> Parser& { - m_exeName = exeName; - return *this; - } - - auto operator|=(Arg const& arg) -> Parser& { - m_args.push_back(arg); - return *this; - } - - auto operator|=(Opt const& opt) -> Parser& { - m_options.push_back(opt); - return *this; - } - - auto operator|=(Parser const& other) -> Parser& { - m_options.insert(m_options.end(), other.m_options.begin(), other.m_options.end()); - m_args.insert(m_args.end(), other.m_args.begin(), other.m_args.end()); - return *this; - } - - template - auto operator|(T const& other) const -> Parser { - return Parser(*this) |= other; - } - - auto getHelpColumns() const -> std::vector { - std::vector cols; - for (auto const& o : m_options) { - auto childCols = o.getHelpColumns(); - cols.insert(cols.end(), childCols.begin(), childCols.end()); - } - return cols; - } - - void writeToStream(std::ostream& os) const { - if (!m_exeName.name().empty()) { - os << "usage:\n" - << " " << m_exeName.name() << " "; - bool required = true, first = true; - for (auto const& arg : m_args) { - if (first) - first = false; - else - os << " "; - if (arg.isOptional() && required) { - os << "["; - required = false; - } - os << "<" << arg.hint() << ">"; - if (arg.cardinality() == 0) os << " ... "; - } - if (!required) os << "]"; - if (!m_options.empty()) os << " options"; - os << "\n\nwhere options are:" << std::endl; - } - - auto rows = getHelpColumns(); - size_t consoleWidth = CLARA_CONFIG_CONSOLE_WIDTH; - size_t optWidth = 0; - for (auto const& cols : rows) optWidth = (std::max)(optWidth, cols.left.size() + 2); - - for (auto const& cols : rows) { - auto row = TextFlow::Column(cols.left).width(optWidth).indent(2) + TextFlow::Spacer(4) + - TextFlow::Column(cols.right).width(consoleWidth - 7 - optWidth); - os << row << std::endl; - } - } - - friend auto operator<<(std::ostream& os, Parser const& parser) -> std::ostream& { - parser.writeToStream(os); - return os; - } - - auto validate() const -> Result override { - for (auto const& opt : m_options) { - auto result = opt.validate(); - if (!result) return result; - } - for (auto const& arg : m_args) { - auto result = arg.validate(); - if (!result) return result; - } - return Result::ok(); - } - - using ParserBase::parse; - - auto parse(std::string const& exeName, TokenStream const& tokens) const - -> InternalParseResult override { - struct ParserInfo { - ParserBase const* parser = nullptr; - size_t count = 0; - }; - const size_t totalParsers = m_options.size() + m_args.size(); - assert(totalParsers < 512); - // ParserInfo parseInfos[totalParsers]; // <-- this is what we really want to do - ParserInfo parseInfos[512]; - - { - size_t i = 0; - for (auto const& opt : m_options) parseInfos[i++].parser = &opt; - for (auto const& arg : m_args) parseInfos[i++].parser = &arg; - } - - m_exeName.set(exeName); - - auto result = InternalParseResult::ok(ParseState(ParseResultType::NoMatch, tokens)); - while (result.value().remainingTokens()) { - bool tokenParsed = false; - - for (size_t i = 0; i < totalParsers; ++i) { - auto& parseInfo = parseInfos[i]; - if (parseInfo.parser->cardinality() == 0 || - parseInfo.count < parseInfo.parser->cardinality()) { - result = parseInfo.parser->parse(exeName, result.value().remainingTokens()); - if (!result) return result; - if (result.value().type() != ParseResultType::NoMatch) { - tokenParsed = true; - ++parseInfo.count; - break; - } - } - } - - if (result.value().type() == ParseResultType::ShortCircuitAll) return result; - if (!tokenParsed) - return InternalParseResult::runtimeError("Unrecognised token: " + - result.value().remainingTokens()->token); - } - // !TBD Check missing required options - return result; - } -}; - -template -template -auto ComposableParserImpl::operator|(T const& other) const -> Parser { - return Parser() | static_cast(*this) | other; -} -} // namespace detail - - -// A Combined parser -using detail::Parser; - -// A parser for options -using detail::Opt; - -// A parser for arguments -using detail::Arg; - -// Wrapper for argc, argv from main() -using detail::Args; - -// Specifies the name of the executable -using detail::ExeName; - -// Convenience wrapper for option parser that specifies the help option -using detail::Help; - -// enum of result types from a parse -using detail::ParseResultType; - -// Result type for parser operation -using detail::ParserResult; - - -} // namespace clara - -#endif // CLARA_HPP_INCLUDED diff --git a/tests/src/compiler/hipClassKernel.cpp b/tests/src/compiler/hipClassKernel.cpp deleted file mode 100644 index 5e368dd3b2..0000000000 --- a/tests/src/compiler/hipClassKernel.cpp +++ /dev/null @@ -1,355 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include "hipClassKernel.h" - -#ifdef ENABLE_OVERLOAD_OVERRIDE_TESTS -__global__ void -ovrdClassKernel(bool* result_ecd){ - int tid = threadIdx.x + blockIdx.x * blockDim.x; - testOvrD tobj1; - result_ecd[tid] = (tobj1.ovrdFunc1() == 30); -} - -void HipClassTests::TestForOverride(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - hipLaunchKernelGGL(ovrdClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} - - -__global__ void -ovldClassKernel(bool* result_ecd){ - int tid = threadIdx.x + blockIdx.x * blockDim.x; - testFuncOvld tfo1; - result_ecd[tid] = (tfo1.func1(10) == 20) - && (tfo1.func1(10,10) == 30); -} - -void HipClassTests::TestForOverload(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - hipLaunchKernelGGL(ovldClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} -#endif - -#ifdef ENABLE_FRIEND_TEST -// check for friend -__global__ void -friendClassKernel(bool* result_ecd){ - int tid = threadIdx.x + blockIdx.x * blockDim.x; - testFrndB tfb1; - result_ecd[tid] = (tfb1.showA() == 10); -} -#endif - -// check sizeof empty class is 1 -__global__ void -emptyClassKernel(bool* result_ecd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - testClassEmpty ob1,ob2; - result_ecd[tid] = (sizeof(testClassEmpty) == 1) - && (&ob1 != &ob2); -} - -void HipClassTests::TestForEmptyClass(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - hipLaunchKernelGGL(emptyClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} - -// tests for classes >8 bytes -__global__ void - sizeClassBKernel(bool* result_ecd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - result_ecd[tid] = (sizeof(testSizeB) == 12) - && (sizeof(testSizeC) == 16) - && (sizeof(testSizeP1) == 6) - && (sizeof(testSizeP2) == 13) - && (sizeof(testSizeP3) == 8); -} - -void HipClassTests::TestForClassBSize(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - hipLaunchKernelGGL(sizeClassBKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} - -__global__ void -sizeClassKernel(bool* result_ecd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - result_ecd[tid] = (sizeof(testSizeA) == 16) - && (sizeof(testSizeDerived) == 24) - && (sizeof(testSizeDerived2) == 20); -} - -void HipClassTests::TestForClassSize(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - hipLaunchKernelGGL(sizeClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} - -#ifdef ENABLE_VIRTUAL_TESTS -__global__ void - sizeVirtualClassKernel(bool* result_ecd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - result_ecd[tid] = (sizeof(testSizeDV) == 16) - && (sizeof(testSizeDerivedDV) == 16) - && (sizeof(testSizeVirtDerPack) == 24) - && (sizeof(testSizeVirtDer) == 24) - && (sizeof(testSizeDerMulti) == 48) ; - } - -void HipClassTests::TestForVirtualClassSize(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - hipLaunchKernelGGL(sizeVirtualClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} -#endif - -// check pass by value -__global__ void -passByValueKernel(testPassByValue obj, bool* result_ecd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - result_ecd[tid] = (obj.exI == 10) - && (obj.exC == 'C'); -} - -void HipClassTests::TestForPassByValue(void){ - bool *result_ecd,*result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - testPassByValue exObj; - exObj.exI = 10; - exObj.exC = 'C'; - hipLaunchKernelGGL(passByValueKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - exObj, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); -} - - // check obj created with hipMalloc -__global__ void -mallocObjKernel(testPassByValue *obj, bool* result_ecd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - result_ecd[tid] = (obj->exI == 100) - && (obj->exC == 'C'); -} - -void HipClassTests::TestForMallocPassByValue(void){ - bool *result_ecd,*result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - - - testPassByValue *exObjM; - HIPCHECK(hipMalloc(&exObjM, sizeof(testPassByValue))); - exObjM->exI = 100; - exObjM->exC = 'C'; - hipLaunchKernelGGL(mallocObjKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - exObjM, - result_ecd); - - HipClassTests::VerifyResult(result_ech,result_ecd); - HipClassTests::FreeMem(result_ech,result_ecd); - -} - -// check if constr and destr are accessible from kernel -#ifdef ENABLE_DESTRUCTOR_TEST -__global__ void -testDeviceClassKernel() { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - testDeviceClass ob1; - testDeviceClass ob2; - ob2.iVar = 10; -} - -void HipClassTests::TestForConsrtDesrt(){ - testDeviceClass tDC; - hipLaunchKernelGGL(testDeviceClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0); -} -#endif - -#ifdef ENABLE_FRIEND_TEST -void HipClassTests::TestForFriend(void){ - bool *result_ecd, *result_ech; - result_ech = HipClassTests::AllocateHostMemory(); - result_ecd = HipClassTests::AllocateDeviceMemory(); - hipLaunchKernelGGL(friendClassKernel, - dim3(BLOCKS), - dim3(THREADS_PER_BLOCK), - 0, - 0, - result_ecd); -} -#endif - -bool* HipClassTests::AllocateHostMemory(void){ - bool *result_ech; - HIPCHECK(hipHostMalloc(&result_ech, - NBOOL, - hipHostMallocDefault)); - return result_ech; -} - -bool* HipClassTests::AllocateDeviceMemory(void){ - bool* result_ecd; - HIPCHECK(hipMalloc(&result_ecd, - NBOOL)); - HIPCHECK(hipMemset(result_ecd, - false, - NBOOL)); - return result_ecd; -} - -void HipClassTests::VerifyResult(bool* result_ech, bool* result_ecd){ - HIPCHECK(hipMemcpy(result_ech, - result_ecd, - BLOCKS*sizeof(bool), - hipMemcpyDeviceToHost)); - // validation on host side - for (int i = 0; i < BLOCKS; i++) { - HIPASSERT(result_ech[i] == true); - } -} - -void HipClassTests::FreeMem(bool* result_ech, bool* result_ecd){ - HIPCHECK(hipHostFree(result_ech)); - HIPCHECK(hipFree(result_ecd)); -} - -int main(){ - HipClassTests classTests; - classTests.TestForEmptyClass(); - test_passed(TestForEmptyClass); - classTests.TestForClassBSize(); - test_passed(TestForClassBSize); - classTests.TestForClassSize(); - test_passed(TestForClassSize); - classTests.TestForPassByValue(); - test_passed(TestForPassByValue); - -#ifdef ENABLE_OVERLOAD_OVERRIDE_TESTS - classTests.TestForOverload(); - test_passed(TestForOverload); - classTests.TestForOverride(); - test_passed(TestForOverride); -#endif - -#ifdef ENABLE_FRIEND_TEST - classTests.TestForFriend(); - test_passed(TestForFriend); -#endif - -// classTests.TestForMallocPassByValue(); - // test_passed(TestForMallocPassByValue); #this test is crashing - -#ifdef ENABLE_VIRTUAL_TESTS - classTests.TestForVirtualClassSize(); - test_passed(TestForVirtualClassSize); -#endif - -#ifdef ENABLE_DESTRUCTOR_TEST - classTests.TestForConsrtDesrt(); - test_passed(TestForConsrtDesrt); -#endif -} diff --git a/tests/src/compiler/hipClassKernel.h b/tests/src/compiler/hipClassKernel.h deleted file mode 100644 index c4ffe04cb0..0000000000 --- a/tests/src/compiler/hipClassKernel.h +++ /dev/null @@ -1,239 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#ifndef _COMPILER_HIPCLASSKERNEL_H_ -#define _COMPILER_HIPCLASSKERNEL_H_ - -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -static const int BLOCKS = 512; -static const int THREADS_PER_BLOCK = 1; -static const int ENABLE_DESTRUCTOR_TEST = 0; -static const int ENABLE_VIRTUAL_TESTS = 0; -static const int ENABLE_FRIEND_TEST = 0; -static const int ENABLE_OVERLAD_OVERRIDE_TESTS = 0; -size_t NBOOL = BLOCKS * sizeof(bool); - -#define test_passed(test_name) printf("%s %s PASSED!%s\n", KGRN, #test_name, KNRM); - -#ifdef ENABLE_OVERLOAD_OVERRIDE_TESTS -class testFuncOvld{ - public: - int __host__ __device__ func1(int a){ - return a + 10; - } - - int __host__ __device__ func1(int a , int b){ - return a + b + 10; - } - -}; - - -class testOvrB{ - public: - int __host__ __device__ ovrdFunc1(){ - return 10; - } - - -}; - - -class testOvrD: public testOvrB{ - public: - int __host__ __device__ ovrdFunc1(){ - int x = testOvrB::ovrdFunc1(); - return x + 20; - } - -}; -#endif - -#ifdef ENABLE_FRIEND_TEST -class testFrndA{ - private: - int fa = 10; - public: - friend class testFrndB; -}; - -class testFrndB{ - public: - __host__ __device__ int showA(){ - testFrndA x; - return x.fa; - } -}; -#endif - -class testClassEmpty {}; - -class testPassByValue{ - public: - int exI; - char exC; -}; - -class testSizeA { - public: - float xa; - int ia; - double da; - static char ca; -}; - -class testSizeDerived : testSizeA { - public: - float fd; -}; - -#pragma pack(push,4) -class testSizeDerived2 : testSizeA { - public: - float fd; -}; -#pragma pack(pop) - -class testSizeB { - public: - char ab; - int ib; - char cb; -}; - -#ifdef ENBABLE_VIRTUAL_TESTS -class testSizeVirtDer : public virtual testSizeB { - public: - int ivd; -}; - -class testSizeVirtDer1 : public virtual testSizeB { - public: - int ivd1; -}; - -class testSizeDerMulti : public testSizeVirtDer, public testSizeVirtDer1 { - public: - int ivd2; -}; - -#pragma pack(push,4) -class testSizeVirtDerPack : public virtual testSizeB { - public: - int ivd; -}; -#pragma pack(pop) -#endif - -class testSizeC { - public: - char ac; - int ic; - int bc[2]; -}; - -#ifdef ENABLE_VIRTUAL_TESTS -class testSizeDV { - public: - virtual void __host__ __device__ func1(); - private: - int iDV; - -}; - -class testSizeDerivedDV : testSizeDV { - public: - virtual void __host__ __device__ funcD1(); - private: - int iDDV; -}; -#endif - -#pragma pack(push, 1) -class testSizeP1 { - public: - char ap; - int ip; - char cp; -}; -#pragma pack(pop) - -#pragma pack(push, 1) -class testSizeP2 { - public: - char ap1; - int ip1; - int bp1[2]; -}; -#pragma pack(pop) - -#pragma pack(push, 2) -class testSizeP3 { - public: - char ap2; - int ip2; - char cp2; -}; -#pragma pack(pop) - -#ifdef ENABLE_DESTRUCTOR_TEST -class testDeviceClass { - public: - int iVar; - __host__ __device__ testDeviceClass(); - __host__ __device__ testDeviceClass(int a); - __host__ __device__ ~testDeviceClass(); -}; - -__host__ __device__ -testDeviceClass::testDeviceClass() { - iVar = 5; -} - -__host__ __device__ -testDeviceClass::testDeviceClass(int a) { - iVar = a; -} -#endif - -#endif // _HIPCLASSKERNEL_H_ - -class HipClassTests{ - public: - void TestForEmptyClass(void); - void TestForClassBSize(void); - void TestForClassSize(void); - void TestForVirtualClassSize(void); - void TestForPassByValue(void); - void TestForMallocPassByValue(void); - void TestForConsrtDesrt(void); - void TestForOverload(void); - void TestForOverride(void); - - bool* AllocateHostMemory(void); - bool* AllocateDeviceMemory(void); - void VerifyResult(bool* result_ech, bool* result_ecd); - void FreeMem(bool* result_ech, bool* result_ecd); -}; diff --git a/tests/src/context/hipCtx_simple.cpp b/tests/src/context/hipCtx_simple.cpp deleted file mode 100644 index deeac8bfc0..0000000000 --- a/tests/src/context/hipCtx_simple.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - HIPCHECK(hipInit(0)); - - hipDevice_t device; - hipDevice_t device1; - hipCtx_t ctx; - hipCtx_t ctx1; - - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(&ctx, 0, device)); - HIPCHECK(hipCtxGetCurrent(&ctx1)); - HIPCHECK(hipCtxGetDevice(&device1)); - HIPCHECK(hipCtxPopCurrent(&ctx1)); - HIPCHECK(hipCtxGetCurrent(&ctx1)); - - HIPCHECK(hipCtxDestroy(ctx)); - - passed(); -}; diff --git a/tests/src/context/hipDrvGetPCIBusId.cpp b/tests/src/context/hipDrvGetPCIBusId.cpp deleted file mode 100644 index 087301a371..0000000000 --- a/tests/src/context/hipDrvGetPCIBusId.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include -#include -#include - -int main() { - hipInit(0); - hipDevice_t device; - hipDeviceGet(&device, 0); - char pciBusId[10]; - memset(pciBusId, 0, 10); - hipDeviceGetPCIBusId(pciBusId, 100, device); - printf("PCI Bus ID= %s\n", pciBusId); - return 0; -} diff --git a/tests/src/context/hipDrvMemcpy.cpp b/tests/src/context/hipDrvMemcpy.cpp deleted file mode 100644 index 3bf08923b6..0000000000 --- a/tests/src/context/hipDrvMemcpy.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - -#define LEN 1024 -#define SIZE LEN << 2 - -int main() { - int *A, *B; - hipDeviceptr_t Ad, Bd; - A = new int[LEN]; - B = new int[LEN]; - - for (int i = 0; i < LEN; i++) { - A[i] = i; - } - - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - - hipMemcpyHtoD(Ad, A, SIZE); - hipMemcpyDtoD(Bd, Ad, SIZE); - hipMemcpyDtoH(B, Bd, SIZE); - - for (int i = 0; i < 16; i++) { - std::cout << A[i] << " " << B[i] << std::endl; - } - - int *Ah, *Bh; - hipHostMalloc(&Ah, SIZE, 0); - hipHostMalloc(&Bh, SIZE, 0); - memcpy(Ah, A, SIZE); - hipStream_t stream; - hipStreamCreate(&stream); - - hipMemcpyHtoDAsync(Ad, Ah, SIZE, stream); - hipStreamSynchronize(stream); - hipMemcpyDtoDAsync(Bd, Ad, SIZE, stream); - hipStreamSynchronize(stream); - hipMemcpyDtoHAsync(Bh, Bd, SIZE, stream); - hipStreamSynchronize(stream); - - std::cout << Ah[10] << " " << Bh[10] << std::endl; -} diff --git a/tests/src/context/hipMemsetD8.cpp b/tests/src/context/hipMemsetD8.cpp deleted file mode 100644 index 0712ec5bbd..0000000000 --- a/tests/src/context/hipMemsetD8.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for hipMemsetD8. -// Also serves as a template for other tests. - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * //Small copy - * TEST: %t -N 10 --memsetval 0x42 - * // Oddball size - * TEST: %t -N 10013 --memsetval 0x5a - * // Big copy - * TEST: %t -N 256M --memsetval 0xa6 - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - size_t Nbytes = N * sizeof(char); - char* A_h; - hipDeviceptr_t A_d; - A_h = new char[Nbytes]; - - HIPCHECK(hipMalloc((void**)&A_d, Nbytes)); - - printf("Size=%zu memsetval=%2x \n", Nbytes, memsetval); - HIPCHECK(hipMemsetD8(A_d, memsetval, Nbytes)); - - HIPCHECK(hipMemcpy(A_h, (void*)A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (int i = 0; i < N; i++) { - if (A_h[i] != memsetval) { - failed("mismatch at index:%d computed:%02x, memsetval:%02x\n", i, (int)A_h[i], - (int)memsetval); - } - } - - hipFree((void*)A_d); - delete[] A_h; - passed(); -} diff --git a/tests/src/cppstd/hipInvocable11.cpp b/tests/src/cppstd/hipInvocable11.cpp deleted file mode 100644 index fbcc3c1365..0000000000 --- a/tests/src/cppstd/hipInvocable11.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "is_callable_test.hpp" -#include - -int main() { passed(); } diff --git a/tests/src/cppstd/hipInvocable11Fallback.cpp b/tests/src/cppstd/hipInvocable11Fallback.cpp deleted file mode 100644 index 501752a5b2..0000000000 --- a/tests/src/cppstd/hipInvocable11Fallback.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -// Ensure fallback path is chosen -#define HIP_HAS_RESULT_OF_SFINAE 0 -#define HIP_HAS_INVOCABLE 0 -#include "is_callable_test.hpp" -#include - -int main() { passed(); } diff --git a/tests/src/cppstd/hipInvocable14.cpp b/tests/src/cppstd/hipInvocable14.cpp deleted file mode 100644 index c412b9671d..0000000000 --- a/tests/src/cppstd/hipInvocable14.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -std=c++14 - * TEST: %t - * HIT_END - */ - -#include "is_callable_test.hpp" -#include - -int main() { passed(); } diff --git a/tests/src/cppstd/hipInvocable17.cpp b/tests/src/cppstd/hipInvocable17.cpp deleted file mode 100644 index 9ff4a49304..0000000000 --- a/tests/src/cppstd/hipInvocable17.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -std=c++17 - * TEST: %t - * HIT_END - */ - -#include "is_callable_test.hpp" -#include - -int main() { passed(); } diff --git a/tests/src/cppstd/is_callable_test.hpp b/tests/src/cppstd/is_callable_test.hpp deleted file mode 100644 index 01db88dff3..0000000000 --- a/tests/src/cppstd/is_callable_test.hpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include - -using hip_impl::is_callable; - -template -struct callable_rank : callable_rank -{}; - -template<> -struct callable_rank<0> -{}; - -struct test1 -{ - struct is_callable_class - { - void operator()(int) const - { - } - }; - struct callable_test_param {}; - - void is_callable_function(int) - { - } - - struct is_callable_rank_class - { - void operator()(int, callable_rank<3>) const - { - } - - void operator()(int, callable_rank<4>) const - { - } - }; - - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(not is_callable::value, "callable failed"); - static_assert(not is_callable::value, "callable failed"); - static_assert(not is_callable::value, "callable failed"); - - typedef void (*is_callable_function_pointer)(int); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(is_callable::value, "Not callable"); - static_assert(not is_callable::value, "callable failed"); - static_assert(not is_callable::value, "callable failed"); - static_assert(not is_callable::value, "callable failed"); - - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - static_assert(is_callable)>::value, "Not callable"); - - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - - static_assert(not is_callable::value, "callable failed"); - static_assert(not is_callable, callable_test_param)>::value, "callable failed"); - static_assert(not is_callable, callable_test_param)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable)>::value, "callable failed"); - static_assert(not is_callable::value, "callable failed"); - static_assert(not is_callable::value, "callable failed"); -}; - -struct test2 -{ - typedef int(callable_rank<0>::*fn)(int); - - static_assert(is_callable&, int)>::value, "Failed"); - static_assert(is_callable&, int)>::value, "Failed"); - static_assert(not is_callable&)>::value, "Failed"); - static_assert(not is_callable const&, int)>::value, "Failed"); -}; - -struct test3 -{ - typedef int(callable_rank<0>::*fn)(int); - - typedef callable_rank<0>* T; - typedef callable_rank<1>* DT; - typedef const callable_rank<0>* CT; - typedef std::unique_ptr> ST; - - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(not is_callable::value, "Failed"); - -}; - -struct test4 -{ - typedef int(callable_rank<0>::*fn); - - static_assert(not is_callable::value, "Failed"); -}; - -struct test5 -{ - typedef int(callable_rank<0>::*fn); - - static_assert(is_callable&)>::value, "Failed"); - static_assert(is_callable&&)>::value, "Failed"); - static_assert(is_callable&)>::value, "Failed"); - static_assert(is_callable&)>::value, "Failed"); -}; - -struct test6 -{ - typedef int(callable_rank<0>::*fn); - - typedef callable_rank<0>* T; - typedef callable_rank<1>* DT; - typedef const callable_rank<0>* CT; - typedef std::unique_ptr> ST; - - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - static_assert(is_callable::value, "Failed"); - -}; - -struct test7 -{ - typedef void(*fp)(callable_rank<0>&, int); - - static_assert(is_callable&, int)>::value, "Failed"); - static_assert(is_callable&, int)>::value, "Failed"); - static_assert(not is_callable&, int)>::value, "Failed"); - static_assert(not is_callable::value, "Failed"); - static_assert(not is_callable&)>::value, "Failed"); -}; - -struct test8 -{ - typedef void(&fp)(callable_rank<0>&, int); - - static_assert(is_callable&, int)>::value, "Failed"); - static_assert(is_callable&, int)>::value, "Failed"); - static_assert(not is_callable&, int)>::value, "Failed"); - static_assert(not is_callable::value, "Failed"); - static_assert(not is_callable&)>::value, "Failed"); -}; diff --git a/tests/src/cudaRegister.cu b/tests/src/cudaRegister.cu deleted file mode 100644 index 4712171898..0000000000 --- a/tests/src/cudaRegister.cu +++ /dev/null @@ -1,90 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include -#include - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -#define ITER 1024*1024 - -#define check(msg, status){ \ -if(status != cudaSuccess) { \ - printf("%s failed. \n", #msg); \ -} \ -} - -__global__ void Inc1(float *Ad, float *Bd){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if(tx < 1 ){ - for(int i=0;i>>(Ad, Bd); - sleep(3); - A[0] = -(ITER*1.0f); - std::cout<<"Same cache line before completion: \t"<< A[0]<>>(Ad, Bd); - sleep(3); - A[0] = -(ITER*1.0f); - std::cout<<"Diff cache line before completion: \t"< -#include -#include -#include -#include - -#define SIZE 100 -using namespace std; - -static random_device dev; -static mt19937 rng(dev()); - -inline float getRandomFloat(long min = 10, long max = LONG_MAX) { - uniform_real_distribution gen(min, max); - return gen(rng); -} - -__host__ __device__ bool testRelativeAccuracy(float a, hip_bfloat16 b) { - float c = float(b); - // float relative error should be less than 1/(2^7) since bfloat16 - // has 7 bits mantissa. - if(fabs(c - a) / a <= 1.0 / 128){ - return true; - } - return false; -} - -__host__ __device__ void testOperations(float &fa, float &fb) { - - hip_bfloat16 bf_a(fa); - hip_bfloat16 bf_b(fb); - float fc = float(bf_a); - float fd = float(bf_b); - - assert(testRelativeAccuracy(fa, bf_a)); - assert(testRelativeAccuracy(fb, bf_b)); - - assert(testRelativeAccuracy(fc + fd, bf_a + bf_b)); - //when checked as above for add, operation sub fails on GPU - assert(hip_bfloat16(fc - fd) == (bf_a - bf_b)); - assert(testRelativeAccuracy(fc * fd, bf_a * bf_b)); - assert(testRelativeAccuracy(fc / fd, bf_a / bf_b)); - - hip_bfloat16 bf_opNegate = -bf_a; - assert(bf_opNegate == -bf_a); - - hip_bfloat16 bf_x; - bf_x = bf_a; - bf_x++; - bf_x--; - ++bf_x; - --bf_x; - //hip_bfloat16 is converted to float and then inc/decremented, hence check with reduced precision - assert(testRelativeAccuracy(bf_x,bf_a)); - - bf_x = bf_a; - bf_x += bf_b; - assert(bf_x == (bf_a + bf_b)); - bf_x = bf_a; - bf_x -= bf_b; - assert(bf_x == (bf_a - bf_b)); - bf_x = bf_a; - bf_x *= bf_b; - assert(bf_x == (bf_a * bf_b)); - bf_x = bf_a; - bf_x /= bf_b; - assert(bf_x == (bf_a / bf_b)); - - hip_bfloat16 bf_rounded = hip_bfloat16::round_to_bfloat16(fa); - if (isnan(bf_rounded)) { - assert(isnan(bf_rounded) || isinf(bf_rounded)); - } -} - -__global__ void testOperationsGPU(float* d_a, float* d_b) -{ - int id = threadIdx.x; - if (id > SIZE) return; - float &a = d_a[id]; - float &b = d_b[id]; - testOperations(a, b); -} - -int main(){ - float *h_fa, *h_fb; - float *d_fa, *d_fb; - - h_fa = new float[SIZE]; - h_fb = new float[SIZE]; - for (int i = 0; i < SIZE; i++) { - h_fa[i] = getRandomFloat(); - h_fb[i] = getRandomFloat(); - testOperations(h_fa[i], h_fb[i]); - } - cout<<"Host bfloat16 Operations Successful!!"< -#include -#include -#include "test_common.h" -#include "hip/hip_complex.h" - -#define LEN 64 -/* Comparing 2 floating point/double variables using floating point -precision. The precision is set at compile time using EPSILON. */ -#define COMPARE_REALNUM(A, B, EPSILON) (fabs(A-B) < EPSILON) - -enum ComplexFuncType { - COMPLEX_ADD, - COMPLEX_SUB, - COMPLEX_MUL, - COMPLEX_DIV, - COMPLEX_CONJ, - COMPLEX_REAL, - COMPLEX_IMAG, - COMPLEX_SQABS, - COMPLEX_ABS -}; - -__global__ void testMakeComplexFunc(float* A, float* B, - hipFloatComplex* C) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - C[tx] = make_hipFloatComplex(A[tx], B[tx]); -} - -__global__ void testMakeComplexFunc(double* A, double* B, - hipDoubleComplex* C) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - C[tx] = make_hipDoubleComplex(A[tx], B[tx]); -} - -__global__ void testComplexMathFunc1(hipFloatComplex* A, - hipFloatComplex* B, - hipFloatComplex* C, - enum ComplexFuncType type) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - switch (type) { - case COMPLEX_ADD: - C[tx] = hipCaddf(A[tx], B[tx]); - break; - case COMPLEX_SUB: - C[tx] = hipCsubf(A[tx], B[tx]); - break; - case COMPLEX_MUL: - C[tx] = hipCmulf(A[tx], B[tx]); - break; - case COMPLEX_DIV: - C[tx] = hipCdivf(A[tx], B[tx]); - break; - case COMPLEX_CONJ: - C[tx] = hipConjf(A[tx]); - break; - } -} - -__global__ void testComplexMathFunc1(hipDoubleComplex* A, - hipDoubleComplex* B, - hipDoubleComplex* C, - enum ComplexFuncType type) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - switch (type) { - case COMPLEX_ADD: - C[tx] = hipCadd(A[tx], B[tx]); - break; - case COMPLEX_SUB: - C[tx] = hipCsub(A[tx], B[tx]); - break; - case COMPLEX_MUL: - C[tx] = hipCmul(A[tx], B[tx]); - break; - case COMPLEX_DIV: - C[tx] = hipCdiv(A[tx], B[tx]); - break; - case COMPLEX_CONJ: - C[tx] = hipConj(A[tx]); - break; - } -} - -__global__ void testComplexMathFunc2(hipFloatComplex* A, - float* B, - enum ComplexFuncType type) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - switch (type) { - case COMPLEX_REAL: - B[tx] = hipCrealf(A[tx]); - break; - case COMPLEX_IMAG: - B[tx] = hipCimagf(A[tx]); - break; - case COMPLEX_SQABS: - B[tx] = hipCsqabsf(A[tx]); - break; - case COMPLEX_ABS: - B[tx] = hipCabsf(A[tx]); - break; - } -} - -__global__ void testComplexMathFunc2(hipDoubleComplex* A, - double* B, - enum ComplexFuncType type) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - switch (type) { - case COMPLEX_REAL: - B[tx] = hipCreal(A[tx]); - break; - case COMPLEX_IMAG: - B[tx] = hipCimag(A[tx]); - break; - case COMPLEX_SQABS: - B[tx] = hipCsqabs(A[tx]); - break; - case COMPLEX_ABS: - B[tx] = hipCabs(A[tx]); - break; - } -} -/** - * Validates all hipComplex inline functions on device - * Functions validated are: make_hipDoubleComplex, make_hipFloatComplex - */ -template bool test_makehipComplex_dev() { - T2 *A, *Ad, *B, *Bd; - T1 *C, *Cd; - bool TestPassed = true; - A = new T2[LEN]; - B = new T2[LEN]; - C = new T1[LEN]; - for (uint32_t i = 0; i < LEN; i++) { - A[i] = 2*i*1.0; - B[i] = (2*i + 1)*1.0; - } - unsigned int size2 = LEN * sizeof(T2); - unsigned int size1 = LEN * sizeof(T1); - HIPCHECK(hipMalloc(reinterpret_cast(&Ad), size2)); - HIPCHECK(hipMalloc(reinterpret_cast(&Bd), size2)); - HIPCHECK(hipMalloc(reinterpret_cast(&Cd), size1)); - HIPCHECK(hipMemcpy(Ad, A, size2, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd, B, size2, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(testMakeComplexFunc, dim3(1), dim3(LEN), - 0, 0, Ad, Bd, Cd); - HIPCHECK(hipMemcpy(C, Cd, size1, hipMemcpyDeviceToHost)); - // Validate the output of the kernel functions. - for (uint32_t i = 0; i < LEN; i++) { - if ((A[i] != C[i].x) || (B[i] != C[i].y)) { - TestPassed = false; - break; - } - } - HIPCHECK(hipFree(Cd)); - HIPCHECK(hipFree(Bd)); - HIPCHECK(hipFree(Ad)); - delete[] C; - delete[] B; - delete[] A; - return TestPassed; -} -/** - * Validates all hipComplex inline functions on device - * Functions validated are: hipCaddf, hipCsubf, hipCmulf and hipCdivf - * hipCadd, hipCsub, hipCmul, hipCdiv - */ -template -bool test_complexMathFunc1_dev(enum ComplexFuncType mathFuncType) { - T1 *A, *Ad, *B, *Bd; - T1 *C, *Cd; - bool TestPassed = true; - A = new T1[LEN]; - B = new T1[LEN]; - C = new T1[LEN]; - for (uint32_t i = 0; i < LEN; i++) { - A[i].x = 2*i*1.0; - A[i].y = (2*i + 1)*1.0; - B[i].x = 2*i*1.0 + 0.5; - B[i].y = (2*i + 1)*1.0 + 0.5; - } - unsigned int size = LEN * sizeof(T1); - HIPCHECK(hipMalloc(reinterpret_cast(&Ad), size)); - HIPCHECK(hipMalloc(reinterpret_cast(&Bd), size)); - HIPCHECK(hipMalloc(reinterpret_cast(&Cd), size)); - HIPCHECK(hipMemcpy(Ad, A, size, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd, B, size, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(testComplexMathFunc1, dim3(1), dim3(LEN), - 0, 0, Ad, Bd, Cd, mathFuncType); - HIPCHECK(hipMemcpy(C, Cd, size, hipMemcpyDeviceToHost)); - // Validate the output of the kernel functions. - T2 epsilon = 0.0001f; - T2 real, imag; - for (uint32_t i = 0; i < LEN; i++) { - if (mathFuncType == COMPLEX_ADD) { - real = (A[i].x + B[i].x); - imag = (A[i].y + B[i].y); - } else if (mathFuncType == COMPLEX_SUB) { - real = (A[i].x - B[i].x); - imag = (A[i].y - B[i].y); - } else if (mathFuncType == COMPLEX_MUL) { - real = (A[i].x*B[i].x - A[i].y*B[i].y); - imag = (A[i].y*B[i].x + A[i].x*B[i].y); - } else if (mathFuncType == COMPLEX_DIV) { - T2 sqabs = (B[i].x*B[i].x + B[i].y*B[i].y); - real = (A[i].x * B[i].x + A[i].y * B[i].y)/sqabs; - imag = (A[i].y * B[i].x - A[i].x * B[i].y)/sqabs; - } else if (mathFuncType == COMPLEX_CONJ) { - real = A[i].x; - imag = -A[i].y; - } - if (!COMPARE_REALNUM(real, C[i].x, epsilon) || - !COMPARE_REALNUM(imag, C[i].y, epsilon)) { - TestPassed = false; - break; - } - } - HIPCHECK(hipFree(Cd)); - HIPCHECK(hipFree(Bd)); - HIPCHECK(hipFree(Ad)); - delete[] C; - delete[] B; - delete[] A; - return TestPassed; -} -/** - * Validates all hipComplex inline functions on device - * Functions validated are: hipCrealf, hipCimagf, hipCsqabsf and hipCabsf - * hipCreal, hipCimag, hipCsqabs, hipCabs - */ -template -bool test_complexMathFunc2_dev(enum ComplexFuncType mathFuncType) { - T1 *A, *Ad; - T2 *B, *Bd; - bool TestPassed = true; - A = new T1[LEN]; - B = new T2[LEN]; - for (uint32_t i = 0; i < LEN; i++) { - A[i].x = 2*i*1.0; - A[i].y = (2*i + 1)*1.0; - } - unsigned int size1 = LEN * sizeof(T1); - unsigned int size2 = LEN * sizeof(T2); - HIPCHECK(hipMalloc(reinterpret_cast(&Ad), size1)); - HIPCHECK(hipMalloc(reinterpret_cast(&Bd), size2)); - HIPCHECK(hipMemcpy(Ad, A, size1, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(testComplexMathFunc2, dim3(1), dim3(LEN), - 0, 0, Ad, Bd, mathFuncType); - HIPCHECK(hipMemcpy(B, Bd, size2, hipMemcpyDeviceToHost)); - // Validate the output of the kernel functions. - T2 epsilon = 0.0001f; - if (mathFuncType == COMPLEX_REAL) { - for (uint32_t i = 0; i < LEN; i++) { - if (!COMPARE_REALNUM(A[i].x, B[i], epsilon)) { - TestPassed = false; - break; - } - } - } else if (mathFuncType == COMPLEX_IMAG) { - for (uint32_t i = 0; i < LEN; i++) { - if (!COMPARE_REALNUM(A[i].y, B[i], epsilon)) { - TestPassed = false; - break; - } - } - } else if (mathFuncType == COMPLEX_SQABS) { - for (uint32_t i = 0; i < LEN; i++) { - T2 sqabs = A[i].x * A[i].x + A[i].y * A[i].y; -#ifdef __HIP_PLATFORM_NVCC__ - /* Setting the Floating Point precision to 0.01 as this scenario - is failing on NVIDIA targets. */ - epsilon = 0.01f; -#endif - if (!COMPARE_REALNUM(sqabs, B[i], epsilon)) { - TestPassed = false; - break; - } - } - } else if (mathFuncType == COMPLEX_ABS) { - for (uint32_t i = 0; i < LEN; i++) { - T2 sqabs = A[i].x * A[i].x + A[i].y * A[i].y; - if (!COMPARE_REALNUM(sqrtf(sqabs), B[i], epsilon)) { - TestPassed = false; - break; - } - } - } - HIPCHECK(hipFree(Bd)); - HIPCHECK(hipFree(Ad)); - delete[] B; - delete[] A; - return TestPassed; -} -/** - * Validates all hipComplex inline functions on host - */ -bool test_allcomplexMathFunc_host() { - bool TestPassed = true; - float fa = 2.0, fb = 3.0; - hipFloatComplex fc = make_hipFloatComplex(fa, fb); - if ((fc.x != fa) || (fc.y != fb)) { - printf("make_hipFloatComplex test failed. \n"); - TestPassed &= false; - } - double da = 2.0, db = 3.0; - hipDoubleComplex dc = make_hipDoubleComplex(da, db); - if ((dc.x != da) || (dc.y != db)) { - printf("make_hipDoubleComplex test failed. \n"); - TestPassed &= false; - } - hipFloatComplex fp, fq, fx; - fp.x = 2.0; - fp.y = 3.0; - fq.x = 4.0; - fq.y = 5.0; - fx = hipCaddf(fp, fq); - if ((fx.x != (fp.x + fq.x)) || (fx.y != (fp.y + fq.y))) { - printf("hipCaddf test failed. \n"); - TestPassed &= false; - } - fx = hipCsubf(fp, fq); - if ((fx.x != (fp.x - fq.x)) || (fx.y != (fp.y - fq.y))) { - printf("hipCsubf test failed. \n"); - TestPassed &= false; - } - fx = hipCmulf(fp, fq); - if ((fx.x != (fp.x*fq.x - fp.y*fq.y)) || - (fx.y != (fp.y*fq.x + fp.x*fq.y))) { - printf("hipCmulf test failed. \n"); - TestPassed &= false; - } - fx = hipCdivf(fp, fq); - float fsqabs = fq.x*fq.x + fq.y*fq.y; - float epsilon = 0.0001f; - if ((!COMPARE_REALNUM(fx.x, (fp.x*fq.x + fp.y*fq.y)/fsqabs, epsilon)) || - (!COMPARE_REALNUM(fx.y, (fp.y*fq.x - fp.x*fq.y)/fsqabs, epsilon))) { - printf("hipCdivf test failed. \n"); - TestPassed &= false; - } - if ((fp.x != hipCrealf(fp)) || (fp.y != hipCimagf(fp))) { - printf("hipCrealf/hipCimagf test failed. \n"); - TestPassed &= false; - } - fx = hipConjf(fp); - if ((fx.x != fp.x) || (fx.y != -fp.y)) { - printf("hipConjf test failed. \n"); - TestPassed &= false; - } - if (!COMPARE_REALNUM((fp.x*fp.x + fp.y*fp.y), hipCsqabsf(fp), epsilon)) { - printf("hipCsqabsf test failed. \n"); - TestPassed &= false; - } - if (!COMPARE_REALNUM(sqrtf(fp.x*fp.x + fp.y*fp.y), hipCabsf(fp), epsilon)) { - printf("hipCabsf test failed. \n"); - TestPassed &= false; - } - hipDoubleComplex dp, dq, dx; - dp.x = 2.0; - dp.y = 3.0; - dq.x = 4.0; - dq.y = 5.0; - dx = hipCadd(dp, dq); - if ((dx.x != (dp.x + dq.x)) || (dx.y != (dp.y + dq.y))) { - printf("hipCadd test failed. \n"); - TestPassed &= false; - } - dx = hipCsub(dp, dq); - if ((dx.x != (dp.x - dq.x)) || (dx.y != (dp.y - dq.y))) { - printf("hipCsub test failed. \n"); - TestPassed &= false; - } - dx = hipCmul(dp, dq); - if ((dx.x != (dp.x*dq.x - dp.y*dq.y)) || - (dx.y != (dp.y*dq.x + dp.x*dq.y))) { - printf("hipCmul test failed. \n"); - TestPassed &= false; - } - dx = hipCdiv(dp, dq); - float dsqabs = dq.x*dq.x + dq.y*dq.y; - if ((!COMPARE_REALNUM(dx.x, (dp.x*dq.x + dp.y*dq.y)/dsqabs, epsilon)) || - (!COMPARE_REALNUM(dx.y, (dp.y*dq.x - dp.x*dq.y)/dsqabs, epsilon))) { - printf("hipCdiv test failed. \n"); - TestPassed &= false; - } - if ((dp.x != hipCreal(dp)) || (dp.y != hipCimag(dp))) { - printf("hipCreal/hipCimag test failed. \n"); - TestPassed &= false; - } - dx = hipConj(dp); - if ((dx.x != dp.x) || (dx.y != -dp.y)) { - printf("hipConj test failed. \n"); - TestPassed &= false; - } - if (!COMPARE_REALNUM((dp.x*dp.x + dp.y*dp.y), hipCsqabs(dp), epsilon)) { - printf("hipCsqabs test failed. \n"); - TestPassed &= false; - } - if (!COMPARE_REALNUM(sqrtf(dp.x*dp.x + dp.y*dp.y), hipCabs(dp), epsilon)) { - printf("hipCabs test failed. \n"); - TestPassed &= false; - } - return TestPassed; -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - if (p_tests == 0x01) { - TestPassed = test_makehipComplex_dev(); - } else if (p_tests == 0x02) { - TestPassed = test_makehipComplex_dev(); - } else if (p_tests == 0x03) { - TestPassed = test_makehipComplex_dev(); - } else if (p_tests == 0x04) { - TestPassed = test_makehipComplex_dev(); - } else if (p_tests == 0x05) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_ADD); - } else if (p_tests == 0x06) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_ADD); - } else if (p_tests == 0x07) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_SUB); - } else if (p_tests == 0x08) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_SUB); - } else if (p_tests == 0x09) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_MUL); - } else if (p_tests == 0x0A) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_MUL); - } else if (p_tests == 0x0B) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_DIV); - } else if (p_tests == 0x0C) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_DIV); - } else if (p_tests == 0x0D) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_CONJ); - } else if (p_tests == 0x0E) { - TestPassed = - test_complexMathFunc1_dev(COMPLEX_CONJ); - } else if (p_tests == 0x0F) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_REAL); - } else if (p_tests == 0x10) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_REAL); - } else if (p_tests == 0x11) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_IMAG); - } else if (p_tests == 0x12) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_IMAG); - } else if (p_tests == 0x13) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_SQABS); - } else if (p_tests == 0x14) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_SQABS); - } else if (p_tests == 0x15) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_ABS); - } else if (p_tests == 0x16) { - TestPassed = - test_complexMathFunc2_dev(COMPLEX_ABS); - } else if (p_tests == 0x17) { - TestPassed = test_allcomplexMathFunc_host(); - } else { - printf("Invalid Test Case \n"); - passed(); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/deviceLib/hipDeviceMalloc.cpp b/tests/src/deviceLib/hipDeviceMalloc.cpp deleted file mode 100644 index 314d815a51..0000000000 --- a/tests/src/deviceLib/hipDeviceMalloc.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../../src/test_common.cpp - * TEST: %t - * HIT_END - */ -#include "test_common.h" -#include -#include - -// Tolerance for error -const double tolerance = 1e-6; -const bool verbose = false; - -#define BLKDIM_X 64 -#define BLKDIM_Y 1 -#define BLKDIM_Z 1 -#define NUM_BLK_X 1 -#define NUM_BLK_Y 1 -#define NUM_BLK_Z 1 - -#define LEN (BLKDIM_X * BLKDIM_Y * BLKDIM_Z * NUM_BLK_X * NUM_BLK_Y * NUM_BLK_Z) - -#define ALL_FUN \ - OP(add) \ - OP(sub) \ - OP(mul) \ - OP(div) - -#define OP(x) CK_##x, -enum CalcKind { - ALL_FUN -}; -#undef OP - -#define OP(x) case CK_##x: return #x; -std::string getName(enum CalcKind CK) { - switch(CK){ - ALL_FUN - } -} -#undef OP - -// Calculates function. -// If the function has one argument, B is ignored. -#define ONE_ARG(func) \ - case CK_##func: \ - return std::func(A); - -template -__device__ __host__ FloatT calc(FloatT A, FloatT B, enum CalcKind CK) { - switch (CK) { - case CK_add: - return A + B; - case CK_sub: - return A - B; - case CK_mul: - return A * B; - case CK_div: - return A / B; - } -} - -// Allocate memory in kernel and save the address to pA and pB. -// Copy value from A, B to allocated memory. -template -__global__ void kernel_alloc(FloatT* A, FloatT* B, FloatT** pA, FloatT** pB) { - int tx = threadIdx.x + blockDim.x * blockIdx.x - + (threadIdx.y + blockDim.y * blockIdx.y) * blockDim.x - + (threadIdx.z + blockDim.z * blockIdx.z) * blockDim.x - * blockDim.y; - if (tx == 0) { - *pA = (FloatT*)malloc(sizeof(FloatT) * LEN); - *pB = (FloatT*)malloc(sizeof(FloatT) * LEN); - for (int i = 0; i < LEN; i++) { - (*pA)[i] = A[i]; - (*pB)[i] = B[i]; - } - } -} - -// Do calculation using values saved in allocated memmory. pA, pB are buffers -// containing the address of the device-side allocated array. -template -__global__ void kernel_free(FloatT** pA, FloatT** pB, FloatT* C, enum CalcKind CK) { - int tx = threadIdx.x + blockDim.x * blockIdx.x - + (threadIdx.y + blockDim.y * blockIdx.y) * blockDim.x - + (threadIdx.z + blockDim.z * blockIdx.z) * blockDim.x - * blockDim.y; - C[tx] = calc((*pA)[tx], (*pB)[tx], CK); - if (tx == 0) { - free(*pA); - free(*pB); - } -} - -template -void test() { - FloatT *A, *Ad, *B, *Bd, *C, *Cd, *D; - A = new FloatT[LEN]; - B = new FloatT[LEN]; - C = new FloatT[LEN]; - D = new FloatT[LEN]; - hipMalloc((void**)&Ad, sizeof(FloatT) * LEN); - hipMalloc((void**)&Bd, sizeof(FloatT) * LEN); - hipMalloc((void**)&Cd, sizeof(FloatT) * LEN); - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = (i + 1) * 1.0f; - B[i] = A[i]; - C[i] = A[i]; - } - hipMemcpy(Ad, A, sizeof(FloatT) * LEN, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, sizeof(FloatT) * LEN, hipMemcpyHostToDevice); - - // Run kernel for a calculation kind and verify by comparing with host - // calculation result. Returns false if fails. - auto test_fun = [&](enum CalcKind CK) { - // kernel_alloc allocates memory on device side and initialize it. - // kernel_free uses allocated memory from kernel_alloc and does the - // calculation then free the memory. - // pA and pB are buffers to pass the device-side allocated memory address - // from kernel_alloc to kernel_free. - FloatT **pA, **pB; - hipMalloc((FloatT***)&pA, sizeof(FloatT*)); - hipMalloc((FloatT***)&pB, sizeof(FloatT*)); - dim3 blkDim(BLKDIM_X, BLKDIM_Y, BLKDIM_Z); - dim3 numBlk(NUM_BLK_X, NUM_BLK_Y, NUM_BLK_Z); - hipLaunchKernelGGL(kernel_alloc, numBlk, blkDim, 0, 0, - Ad, Bd, pA, pB); - hipDeviceSynchronize(); - hipLaunchKernelGGL(kernel_free, numBlk, blkDim, 0, 0, - pA, pB, Cd, CK); - hipMemcpy(C, Cd, sizeof(FloatT) * LEN, hipMemcpyDeviceToHost); - hipFree(pA); - hipFree(pB); - for (int i = 0; i < LEN; i++) { - FloatT Expected = calc(A[i], B[i], CK); - FloatT error = std::abs(C[i] - Expected); - if (std::abs(Expected) > tolerance) error /= std::abs(Expected); - bool pass = error < tolerance; - if (verbose || !pass) { - std::cout << "Function: " << getName(CK) << " Operands: " << A[i] << " " << B[i] - << " Result: " << C[i] << " Expected: " << Expected << " Error: " << error - << " Pass: " << pass << std::endl; - } - if (!pass) - return false; - } - return true; - }; - -#define OP(x) assert(test_fun(CK_##x)); - ALL_FUN -#undef OP - - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - delete[] A; - delete[] B; - delete[] C; - delete[] D; -} - -int main() { - test(); - test(); - passed(); - return 0; -} diff --git a/tests/src/deviceLib/hipDeviceMemcpy.cpp b/tests/src/deviceLib/hipDeviceMemcpy.cpp deleted file mode 100644 index 7033dc2113..0000000000 --- a/tests/src/deviceLib/hipDeviceMemcpy.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "../test_common.h" - - -#define LEN 1024 -#define SIZE LEN << 2 - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - - -__global__ void cpy(uint32_t* Out, uint32_t* In) { - int tx = threadIdx.x; - memcpy(Out + tx, In + tx, sizeof(uint32_t)); -} - -__global__ void set(uint32_t* ptr, uint8_t val, size_t size) { - int tx = threadIdx.x; - memset(ptr + tx, val, sizeof(uint32_t)); -} - -int main() { - uint32_t *A, *Ad, *B, *Bd; - uint32_t* Val; - A = new uint32_t[LEN]; - B = new uint32_t[LEN]; - Val = new uint32_t; - *Val = 0; - for (int i = 0; i < LEN; i++) { - A[i] = i; - B[i] = 0; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - - hipLaunchKernelGGL(cpy, dim3(1), dim3(LEN), 0, 0, Bd, Ad); - - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - for (int i = LEN - 16; i < LEN; i++) { - if (A[i] != B[i]) { - return 0; - } - } - hipLaunchKernelGGL(set, dim3(1), dim3(LEN), 0, 0, Bd, 0x1, LEN); - - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - for (int i = LEN - 16; i < LEN; i++) { - if (0x01010101 != B[i]) { - return 0; - } - } - - passed(); -} diff --git a/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp deleted file mode 100644 index 10cd58a8e3..0000000000 --- a/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void double_precision_intrinsics() { -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dadd_rd(0.0, 1.0); -#endif - __dadd_rn(0.0, 1.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dadd_ru(0.0, 1.0); - __dadd_rz(0.0, 1.0); - __ddiv_rd(0.0, 1.0); -#endif - __ddiv_rn(0.0, 1.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __ddiv_ru(0.0, 1.0); - __ddiv_rz(0.0, 1.0); - __dmul_rd(1.0, 2.0); -#endif - __dmul_rn(1.0, 2.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dmul_ru(1.0, 2.0); - __dmul_rz(1.0, 2.0); - __drcp_rd(2.0); -#endif - __drcp_rn(2.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __drcp_ru(2.0); - __drcp_rz(2.0); - __dsqrt_rd(4.0); -#endif - __dsqrt_rn(4.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dsqrt_ru(4.0); - __dsqrt_rz(4.0); - __dsub_rd(2.0, 1.0); -#endif - __dsub_rn(2.0, 1.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dsub_ru(2.0, 1.0); - __dsub_rz(2.0, 1.0); - __fma_rd(1.0, 2.0, 3.0); -#endif - __fma_rn(1.0, 2.0, 3.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fma_ru(1.0, 2.0, 3.0); - __fma_rz(1.0, 2.0, 3.0); -#endif -} - -__global__ void compileDoublePrecisionIntrinsics(int ignored) { - double_precision_intrinsics(); -} - -int main() { - hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, 1); - passed(); -} diff --git a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp b/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp deleted file mode 100644 index 657e0ba804..0000000000 --- a/tests/src/deviceLib/hipDoublePrecisionMathDevice.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void double_precision_math_functions() { - int iX; - double fX, fY; - - acos(1.0); - acosh(1.0); - asin(0.0); - asinh(0.0); - atan(0.0); - atan2(0.0, 1.0); - atanh(0.0); - cbrt(0.0); - ceil(0.0); - copysign(1.0, -2.0); - cos(0.0); - cosh(0.0); - cospi(0.0); - cyl_bessel_i0(0.0); - cyl_bessel_i1(0.0); - erf(0.0); - erfc(0.0); - erfcinv(2.0); - erfcx(0.0); - erfinv(1.0); - exp(0.0); - exp10(0.0); - exp2(0.0); - expm1(0.0); - fabs(1.0); - fdim(1.0, 0.0); - floor(0.0); - fma(1.0, 2.0, 3.0); - fmax(0.0, 0.0); - fmin(0.0, 0.0); - fmod(0.0, 1.0); - frexp(0.0, &iX); - hypot(1.0, 0.0); - ilogb(1.0); - isfinite(0.0); - isinf(0.0); - isnan(0.0); - j0(0.0); - j1(0.0); - jn(-1.0, 1.0); - ldexp(0.0, 0); - lgamma(1.0); - llrint(0.0); - llround(0.0); - log(1.0); - log10(1.0); - log1p(-1.0); - log2(1.0); - logb(1.0); - lrint(0.0); - lround(0.0); - modf(0.0, &fX); - nan("1"); - nearbyint(0.0); - nextafter(0.0, 0.0); - fX = 1.0; - norm(1, &fX); - norm3d(1.0, 0.0, 0.0); - norm4d(1.0, 0.0, 0.0, 0.0); - normcdf(0.0); - normcdfinv(1.0); - pow(1.0, 0.0); - rcbrt(1.0); - remainder(2.0, 1.0); - remquo(1.0, 2.0, &iX); - rhypot(0.0, 1.0); - rint(1.0); - fX = 1.0; - rnorm(1, &fX); - rnorm3d(0.0, 0.0, 1.0); - rnorm4d(0.0, 0.0, 0.0, 1.0); - round(0.0); - rsqrt(1.0); - scalbln(0.0, 1); - scalbn(0.0, 1); - signbit(1.0); - sin(0.0); -#if not(defined(__HIP_PLATFORM_NVIDIA__) && (CUDA_VERSION == 11030 || CUDA_VERSION == 11020)) - //NV A100 has a bug in sincos(), so temporarily disbale it - sincos(0.0, &fX, &fY); -#endif - sincospi(0.0, &fX, &fY); - sinh(0.0); - sinpi(0.0); - sqrt(0.0); - tan(0.0); - tanh(0.0); - tgamma(2.0); - trunc(0.0); - y0(1.0); - y1(1.0); - yn(1, 1.0); -} - -__global__ void compileDoublePrecisionMathOnDevice(int) { - double_precision_math_functions(); -} - -int main() { - hipLaunchKernelGGL( - compileDoublePrecisionMathOnDevice, - dim3(1, 1, 1), - dim3(1, 1, 1), - 0, - 0, - 1); - passed(); -} diff --git a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp deleted file mode 100644 index fd67e7e6cd..0000000000 --- a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -//#include -#include "test_common.h" -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__host__ void double_precision_math_functions() { - int iX; - double fX, fY; - - acos(1.0); - acosh(1.0); - asin(0.0); - asinh(0.0); - atan(0.0); - atan2(0.0, 1.0); - atanh(0.0); - cbrt(0.0); - ceil(0.0); - copysign(1.0, -2.0); - cos(0.0); - cosh(0.0); - // cospi(0.0); - // cyl_bessel_i0(0.0); - // cyl_bessel_i1(0.0); - erf(0.0); - erfc(0.0); - // erfcinv(2.0); - // erfcx(0.0); - // erfinv(1.0); - exp(0.0); - #ifdef __unix__ - exp10(0.0); - #endif - exp2(0.0); - expm1(0.0); - fabs(1.0); - fdim(1.0, 0.0); - floor(0.0); - fma(1.0, 2.0, 3.0); - fmax(0.0, 0.0); - fmin(0.0, 0.0); - fmod(0.0, 1.0); - frexp(0.0, &iX); - hypot(1.0, 0.0); - ilogb(1.0); - std::isfinite(0.0); - std::isinf(0.0); - std::isnan(0.0); - #ifdef __unix__ - j0(0.0); - j1(0.0); - jn(-1.0, 1.0); - #elif _WIN64 - _j0(0.0); - _j1(0.0); - _jn(-1.0, 1.0); - #endif - ldexp(0.0, 0); - // lgamma(1.0); - llrint(0.0); - llround(0.0); - log(1.0); - log10(1.0); - log1p(-1.0); - log2(1.0); - logb(1.0); - lrint(0.0); - lround(0.0); - modf(0.0, &fX); - nan("1"); - nearbyint(0.0); - // nextafter(0.0); - fX = 1.0; // norm(1, &fX); -#if defined(__HIP_PLATFORM_AMD__) - // norm3d(1.0, 0.0, 0.0); - // norm4d(1.0, 0.0, 0.0, 0.0); -#endif - // normcdf(0.0); - // normcdfinv(1.0); - pow(1.0, 0.0); - // rcbrt(1.0); - - remainder(2.0, 1.0); - remquo(1.0, 2.0, &iX); -#if defined(__HIP_PLATFORM_AMD__) - // rhypot(0.0, 1.0); -#endif - rint(1.0); -#if defined(__HIP_PLATFORM_AMD__) - fX = 1.0; // rnorm(1, &fX); - // rnorm3d(0.0, 0.0, 1.0); - // rnorm4d(0.0, 0.0, 0.0, 1.0); -#endif - round(0.0); - // rsqrt(1.0); - scalbln(0.0, 1); - scalbn(0.0, 1); - std::signbit(1.0); - sin(0.0); - #ifdef _unix__ - sincos(0.0, &fX, &fY); - #endif - // sincospi(0.0, &fX, &fY); - sinh(0.0); - // sinpi(0.0); - sqrt(0.0); - tan(0.0); - tanh(0.0); - tgamma(2.0); - trunc(0.0); - #ifdef __unix__ - y0(1.0); - y1(1.0); - yn(1, 1.0); - #elif _WIN64 - _y0(1.0); - _y1(1.0); - _yn(1, 1.0); - #endif -} - -static void compileOnHost() { double_precision_math_functions(); } - -int main() { - compileOnHost(); - passed(); -} diff --git a/tests/src/deviceLib/hipFloatMath.cpp b/tests/src/deviceLib/hipFloatMath.cpp deleted file mode 100644 index 7354c306b9..0000000000 --- a/tests/src/deviceLib/hipFloatMath.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include - -#define LEN 512 -#define SIZE LEN << 2 - - -__global__ void floatMath(float* In, float* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Out[tid] = __cosf(In[tid]); - Out[tid] = __exp10f(Out[tid]); - Out[tid] = __expf(Out[tid]); - Out[tid] = __frsqrt_rn(Out[tid]); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - Out[tid] = __fsqrt_rd(Out[tid]); -#endif - Out[tid] = __fsqrt_rn(Out[tid]); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - Out[tid] = __fsqrt_ru(Out[tid]); - Out[tid] = __fsqrt_rz(Out[tid]); -#endif - Out[tid] = __log10f(Out[tid]); - Out[tid] = __log2f(Out[tid]); - Out[tid] = __logf(Out[tid]); - Out[tid] = __powf(2.0f, Out[tid]); - __sincosf(Out[tid], &In[tid], &Out[tid]); - Out[tid] = __sinf(Out[tid]); - Out[tid] = __cosf(Out[tid]); - Out[tid] = __tanf(Out[tid]); -} - -int main() { - float *Ind, *Outd; - hipMalloc((void**)&Ind, SIZE); - hipMalloc((void**)&Outd, SIZE); - hipLaunchKernelGGL(floatMath, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, Ind, Outd); - passed(); -} diff --git a/tests/src/deviceLib/hipFloatMathPrecise.cpp b/tests/src/deviceLib/hipFloatMathPrecise.cpp deleted file mode 100644 index c297b9c48f..0000000000 --- a/tests/src/deviceLib/hipFloatMathPrecise.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -__global__ void FloatMathPrecise() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - fX = ceilf(0.0f); - fX = copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - cospif(0.0f); - cyl_bessel_i0f(0.0f); - cyl_bessel_i1f(0.0f); - erfcf(0.0f); - erfcinvf(2.0f); - erfcxf(0.0f); - erff(0.0f); - erfinvf(1.0f); - exp10f(0.0f); - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fX = fabsf(1.0f); - fdimf(1.0f, 0.0f); - fdividef(0.0f, 1.0f); - fX = floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fX = fmaxf(0.0f, 0.0f); - fX = fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - isfinite(0.0f); - fX = isinf(0.0f); - fX = isnan(0.0f); - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - ldexpf(0.0f, 0); - lgammaf(1.0f); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - modff(0.0f, &fX); - fX = nanf("1"); - fX = nearbyintf(0.0f); - nextafterf(0.0f, 0.0f); - norm3df(1.0f, 0.0f, 0.0f); - norm4df(1.0f, 0.0f, 0.0f, 0.0f); - normcdff(0.0f); - normcdfinvf(1.0f); - fX = 1.0f; - normf(1, &fX); - powf(1.0f, 0.0f); - rcbrtf(1.0f); - remainderf(2.0f, 1.0f); - remquof(1.0f, 2.0f, &iX); - rhypotf(0.0f, 1.0f); - fY = rintf(1.0f); - rnorm3df(0.0f, 0.0f, 1.0f); - rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); - fX = 1.0f; - rnormf(1, &fX); - fY = roundf(0.0f); - rsqrtf(1.0f); - scalblnf(0.0f, 1); - scalbnf(0.0f, 1); - signbit(1.0f); - sincosf(0.0f, &fX, &fY); - sincospif(0.0f, &fX, &fY); - sinf(0.0f); - sinhf(0.0f); - sinpif(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - fY = truncf(0.0f); - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); -} - -int main() { - hipLaunchKernelGGL(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0); - passed(); -} diff --git a/tests/src/deviceLib/hipHalf2Comparision.cpp b/tests/src/deviceLib/hipHalf2Comparision.cpp deleted file mode 100644 index 54bbaac5e7..0000000000 --- a/tests/src/deviceLib/hipHalf2Comparision.cpp +++ /dev/null @@ -1,341 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include "hip/hip_fp16.h" - -#define test_passed(test_name) \ - printf("%s %s PASSED!%s\n", KGRN, #test_name, KNRM); - -enum half2Op { - HALF2_OP_HEQ2 = 0, - HALF2_OP_HNE2, - HALF2_OP_HLE2, - HALF2_OP_HGE2, - HALF2_OP_HLT2, - HALF2_OP_HGT2, - HALF2_OP_MAX -}; - -enum half2Test { - HALF2_TEST_FUNCTION = 0, - HALF2_TEST_NAN, - HALF2_TEST_MAX -}; - -// Kernels for half2 comparision functions - -__global__ -void __half2Compare(float* result_D, __half2 a, int n, int half2Op, - int testType) { - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < n; i += stride) { - switch (half2Op) { - case HALF2_OP_HEQ2: - if (testType == HALF2_TEST_FUNCTION) { - result_D[i] = __high2float(__heq2(__hadd2(a, __half2{1, 1}), - __half2{2, 2})); - } else { - result_D[i] = __high2float(__heq2(__h2div(a, __half2{0, 0}), - __half2{0, 0})); - } - break; - case HALF2_OP_HNE2: - result_D[i] = __high2float(__hne2(__hadd2(a, __half2{1, 1}), - __half2{2, 2})); - break; - case HALF2_OP_HLE2: - if (testType == HALF2_TEST_FUNCTION) { - result_D[i] = __high2float(__hle2(__hadd2(a, __half2{1, 1}), - __half2{3, 3})); - } else { - result_D[i] = __high2float(__hle2(__h2div(a, __half2{0, 0}), - __half2{0, 0})); - } - break; - case HALF2_OP_HGE2: - if (testType == HALF2_TEST_FUNCTION) { - result_D[i] = __high2float(__hge2(__hadd2(a, __half2{1, 1}), - __half2{2, 2})); - } else { - result_D[i] = __high2float(__hge2(__h2div(a, __half2{0, 0}), - __half2{0, 0})); - } - break; - case HALF2_OP_HLT2: - if (testType == HALF2_TEST_FUNCTION) { - result_D[i] = __high2float(__hlt2(__hadd2(a, __half2{1, 1}), - __half2{3, 3})); - } else { - result_D[i] = __high2float(__hlt2(__h2div(a, __half2{0, 0}), - __half2{0, 0})); - } - break; - case HALF2_OP_HGT2: - if (testType == HALF2_TEST_FUNCTION) { - result_D[i] = __high2float(__hgt2(__hadd2(a, __half2{1, 1}), - __half2{3, 3})); - } else { - result_D[i] = __high2float(__hgt2(__h2div(a, __half2{0, 0}), - __half2{0, 0})); - } - break; - } - } -} - -static bool isFailed(float expectedValue, float *result_H, int size) { - for (int index = 0; index < size; index++) { - if (expectedValue != result_H[index]) { - return true; - } - } - return false; -} - -int main() { - const int n = 64; - float* result_H = reinterpret_cast(malloc(n*sizeof(float))); - float* result_D; - bool bFunctionalTestFailed = false; - bool bNanTestFailed = false; - int index = 0; - HIPCHECK(hipMalloc(&result_D, n*sizeof(float))); - - // kernel launch and hipmemcpy operation to get return value for heq2 - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{1, 1}, n, HALF2_OP_HEQ2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("heq2: failure when arguments are equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{2, 2}, n, HALF2_OP_HEQ2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("heq2: failure when arguments are not equal\n"); - bFunctionalTestFailed = true; - } - - // kernel launch and hipmemcpy operation to get return value for hne2 - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{2, 2}, n, HALF2_OP_HNE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hne2: failure when arguments are not equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{1, 1}, n, HALF2_OP_HNE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hne2: failure when arguments are equal\n"); - bFunctionalTestFailed = true; - } - - // kernel launch and hipmemcpy operation to get return value for hle2 - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{1, 1}, n, HALF2_OP_HLE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hle2: failure when argument is less than equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{2, 2}, n, HALF2_OP_HLE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hle2: failure when argument is equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{3, 3}, n, HALF2_OP_HLE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hle2: failure when argument is greater\n"); - bFunctionalTestFailed = true; - } - - // kernel launch and hipmemcpy operation to get return value for hge2 - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{2, 2}, n, HALF2_OP_HGE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hge2: failure when argument is greater\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{1, 1}, n, HALF2_OP_HGE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hge2: failure when argument is equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{0, 0}, n, HALF2_OP_HGE2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hge2: failure when argument is less\n"); - bFunctionalTestFailed = true; - } - - // kernel launch and hipmemcpy operation to get return value for hlt2 - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{1, 1}, n, HALF2_OP_HLT2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hlt2: failure when argument is less\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{2, 2}, n, HALF2_OP_HLT2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hlt2: failure when argument is equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{3, 3}, n, HALF2_OP_HLT2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hlt2: failure when argument is greater\n"); - bFunctionalTestFailed = true; - } - - // kernel launch and hipmemcpy operation to get return value for hgt2 - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{3, 3}, n, HALF2_OP_HGT2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(1.0, result_H, n)) { - printf("hgt2: failure when argument is greater\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{2, 2}, n, HALF2_OP_HGT2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hgt2: failure when argument is equal\n"); - bFunctionalTestFailed = true; - } - - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{1, 1}, n, HALF2_OP_HGT2, - HALF2_TEST_FUNCTION); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("hgt2: failure when argument is less\n"); - bFunctionalTestFailed = true; - } - - for (int nanFunctionTest = HALF2_OP_HEQ2; nanFunctionTest < HALF2_OP_MAX; - nanFunctionTest++) { - // HNE2 will not have a NaN test - if (nanFunctionTest != HALF2_OP_HNE2) { - hipLaunchKernelGGL(__half2Compare, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, - result_D, __half2{0, 0}, n, nanFunctionTest, - HALF2_TEST_NAN); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(result_H, result_D, n*sizeof(float), - hipMemcpyDeviceToHost)); - if (isFailed(0.0, result_H, n)) { - printf("NaN test failed for half function: %d\n", nanFunctionTest); - bNanTestFailed = true; - } - } - } - - hipFree(result_D); - free(result_H); - - if ((false == bFunctionalTestFailed) && (false == bNanTestFailed)) { - passed(); - } else { - failed("Some Half2 tests failed"); - } - - return 0; -} - diff --git a/tests/src/deviceLib/hipIntegerIntrinsics.cpp b/tests/src/deviceLib/hipIntegerIntrinsics.cpp deleted file mode 100644 index 5104aa5191..0000000000 --- a/tests/src/deviceLib/hipIntegerIntrinsics.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include -#include -#include "test_common.h" - -#include - -using namespace std; - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void integer_intrinsics() { - __brev((unsigned int)10); - __brevll((unsigned long long)10); - __byte_perm((unsigned int)0, (unsigned int)0, 0); - __clz((int)10); - __clzll((long long)10); - __ffs((int)10); - __ffsll((long long)10); - __funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __hadd((int)1, (int)3); - __mul24((int)1, (int)2); - __mul64hi((long long)1, (long long)2); - __mulhi((int)1, (int)2); - __popc((unsigned int)4); - __popcll((unsigned long long)4); - int a = min((int)4, (int)5); - int b = max((int)4, (int)5); - __rhadd((int)1, (int)2); - __sad((int)1, (int)2, 0); - __uhadd((unsigned int)1, (unsigned int)3); - __umul24((unsigned int)1, (unsigned int)2); - __umul64hi((unsigned long long)1, (unsigned long long)2); - __umulhi((unsigned int)1, (unsigned int)2); - __urhadd((unsigned int)1, (unsigned int)2); - __usad((unsigned int)1, (unsigned int)2, 0); - - assert(1); -} - -__global__ void compileIntegerIntrinsics(int ignored) { integer_intrinsics(); } - -int main() { - hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, 1); - passed(); -} diff --git a/tests/src/deviceLib/hipLaunchKernelFunc.cpp b/tests/src/deviceLib/hipLaunchKernelFunc.cpp deleted file mode 100644 index 027c3e62e7..0000000000 --- a/tests/src/deviceLib/hipLaunchKernelFunc.cpp +++ /dev/null @@ -1,193 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#define HIP_TEMPLATE_KERNEL_LAUNCH -#include "hip/hip_runtime.h" -#include "test_common.h" - -__global__ void kernel_abs_int64(long long* input, long long* output) { - int tx = threadIdx.x; - output[tx] = abs(input[tx]); -} - -__global__ void kernel_lgamma_double(double* input, double* output) { - int tx = threadIdx.x; - output[tx] = lgamma(input[tx]); -} - -#define CHECK_LGAMMA_DOUBLE(IN, OUT, EXP) \ - { \ - if (OUT != EXP) { \ - failed("check_abs_int64 failed on %f (output = %f, expected = %fd)\n", IN, OUT, EXP); \ - } \ - } - -#define CHECK_ABS_INT64(IN, OUT, EXP) \ - { \ - if (OUT != EXP) { \ - failed("check_abs_int64 failed on %lld (output = %lld, expected = %lld)\n", IN, OUT, \ - EXP); \ - } \ - } - -void check_lgamma_double() { - using datatype_t = double; - - const int NUM_INPUTS = 8; - auto memsize = NUM_INPUTS * sizeof(datatype_t); - - // allocate memories - datatype_t* inputCPU = (datatype_t*)malloc(memsize); - datatype_t* outputCPU = (datatype_t*)malloc(memsize); - datatype_t* inputGPU = nullptr; - hipMalloc((void**)&inputGPU, memsize); - datatype_t* outputGPU = nullptr; - hipMalloc((void**)&outputGPU, memsize); - - // populate input - for (int i = 0; i < NUM_INPUTS; i++) { - inputCPU[i] = -3.5 + i; - } - - // copy inputs to device - hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); - - // launch kernel - hipLaunchKernelGGL(kernel_lgamma_double, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); - - // copy outputs from device - hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); - - // check outputs - for (int i = 0; i < NUM_INPUTS; i++) { - CHECK_LGAMMA_DOUBLE(inputCPU[i], outputCPU[i], lgamma(inputCPU[i])); - } - - // free memories - hipFree(inputGPU); - hipFree(outputGPU); - free(inputCPU); - free(outputCPU); - - // done - return; -} - - -void check_abs_int64() { - using datatype_t = long long; - - const int NUM_INPUTS = 8; - auto memsize = NUM_INPUTS * sizeof(datatype_t); - - // allocate memories - datatype_t* inputCPU = (datatype_t*)malloc(memsize); - datatype_t* outputCPU = (datatype_t*)malloc(memsize); - datatype_t* inputGPU = nullptr; - hipMalloc((void**)&inputGPU, memsize); - datatype_t* outputGPU = nullptr; - hipMalloc((void**)&outputGPU, memsize); - - // populate input - inputCPU[0] = -81985529216486895ll; - inputCPU[1] = 81985529216486895ll; - inputCPU[2] = -1250999896491ll; - inputCPU[3] = 1250999896491ll; - inputCPU[4] = -19088743ll; - inputCPU[5] = 19088743ll; - inputCPU[6] = -291ll; - inputCPU[7] = 291ll; - - // copy inputs to device - hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); - - // launch kernel - hipLaunchKernelGGL(kernel_abs_int64, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); - - // copy outputs from device - hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); - - // check outputs - CHECK_ABS_INT64(inputCPU[0], outputCPU[0], outputCPU[1]); - CHECK_ABS_INT64(inputCPU[1], outputCPU[1], outputCPU[1]); - CHECK_ABS_INT64(inputCPU[2], outputCPU[2], outputCPU[3]); - CHECK_ABS_INT64(inputCPU[3], outputCPU[3], outputCPU[3]); - CHECK_ABS_INT64(inputCPU[4], outputCPU[4], outputCPU[5]); - CHECK_ABS_INT64(inputCPU[5], outputCPU[5], outputCPU[5]); - CHECK_ABS_INT64(inputCPU[6], outputCPU[6], outputCPU[7]); - CHECK_ABS_INT64(inputCPU[7], outputCPU[7], outputCPU[7]); - - // free memories - hipFree(inputGPU); - hipFree(outputGPU); - free(inputCPU); - free(outputCPU); - - // done - return; -} - - -template -__global__ void kernel_simple(F f, T* out) { - *out = f(); -} - -template -void check_simple(F f, T expected, const char* file, unsigned line) { - auto memsize = sizeof(T); - T* outputCPU = (T*)malloc(memsize); - T* outputGPU = nullptr; - hipMalloc((void**)&outputGPU, memsize); - hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); - hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); - if (*outputCPU != expected) { - failed("%s line %u : check failed (output = %lf, expected = %lf)\n", file, line, - (double)(*outputCPU), (double)expected); - } - hipFree(outputGPU); - free(outputCPU); -} -#define CHECK_SIMPLE(lambda, expected) check_simple(lambda, expected, __FILE__, __LINE__); - -void test_fp16() { - CHECK_SIMPLE([] __device__() { return max<__fp16>(1.0f, 2.0f); }, 2.0f); - CHECK_SIMPLE([] __device__() { return min<__fp16>(1.0f, 2.0f); }, 1.0f); -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - check_abs_int64(); - - // check_lgamma_double(); - - test_fp16(); - - passed(); -} diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp deleted file mode 100644 index 83b5daa98e..0000000000 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ /dev/null @@ -1,200 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp CLANG_OPTIONS -Xclang -fallow-half-arguments-and-returns EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -__global__ void kernel_abs_int64(long long *input, long long *output) { - int tx = threadIdx.x; - output[tx] = abs(input[tx]); -} - -__global__ void kernel_lgamma_double(double *input, double *output) { - int tx = threadIdx.x; - output[tx] = lgamma(input[tx]); -} - -#define CHECK_LGAMMA_DOUBLE(IN, OUT, EXP) \ - { \ - if (OUT != EXP) { \ - failed("check_abs_int64 failed on %f (output = %f, expected = %fd)\n", IN, OUT, EXP); \ - } \ - } - -#define CHECK_ABS_INT64(IN, OUT, EXP) \ - { \ - if (OUT != EXP) { \ - failed("check_abs_int64 failed on %lld (output = %lld, expected = %lld)\n", IN, OUT, EXP); \ - } \ - } - -void check_lgamma_double() { - - using datatype_t = double; - - const int NUM_INPUTS = 8; - auto memsize = NUM_INPUTS * sizeof(datatype_t); - - // allocate memories - datatype_t *inputCPU = (datatype_t *) malloc(memsize); - datatype_t *outputCPU = (datatype_t *) malloc(memsize); - datatype_t *inputGPU = nullptr; hipMalloc((void**)&inputGPU, memsize); - datatype_t *outputGPU = nullptr; hipMalloc((void**)&outputGPU, memsize); - - // populate input - for (int i=0; i -__global__ void kernel_simple(F f, T *out) { - *out = f(); -} - -template -void check_simple(F f, T expected, const char* file, unsigned line) { - auto memsize = sizeof(T); - T *outputCPU = (T *) malloc(memsize); - T *outputGPU = nullptr; - hipMalloc((void**)&outputGPU, memsize); - hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); - hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); - if (*outputCPU != expected) { - failed("%s line %u : check failed (output = %lf, expected = %lf)\n", - file, line, (double)(*outputCPU), (double)expected); - } - hipFree(outputGPU); - free(outputCPU); -} -#define CHECK_SIMPLE(lambda, expected) \ - check_simple(lambda, expected, __FILE__, __LINE__); - -void test_fp16() { - CHECK_SIMPLE([]__device__(){ return max<__fp16>(1.0f, 2.0f); }, 2.0f); - CHECK_SIMPLE([]__device__(){ return min<__fp16>(1.0f, 2.0f); }, 1.0f); -} - -void test_pown() { - CHECK_SIMPLE([]__device__(){ return powif(2.0f, 2); }, 4.0f); - CHECK_SIMPLE([]__device__(){ return powi(2.0, 2); }, 4.0); - CHECK_SIMPLE([]__device__(){ return pow(2.0f, 2); }, 4.0f); - CHECK_SIMPLE([]__device__(){ return pow(2.0, 2); }, 4.0); - CHECK_SIMPLE([]__device__(){ return pow(2.0f16, 2); }, 4.0f16); -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - check_abs_int64(); - - // check_lgamma_double(); - - test_fp16(); - - test_pown(); - - passed(); -} diff --git a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp deleted file mode 100644 index c2d5be7ce4..0000000000 --- a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp +++ /dev/null @@ -1,365 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 --gpu-architecture=sm_60 - * TEST: %t - * HIT_END - */ - -// Includes HIP Runtime -#include "hip/hip_runtime.h" -#include - -// includes, system -#include -#include -#include -#include -#include -#include - -using namespace std; - -//////////////////////////////////////////////////////////////////////////////// -// Auto-Verification Code -//////////////////////////////////////////////////////////////////////////////// - -bool verifyBitwise(...) { - return true; -} - -template{}>::type* = nullptr> -bool verifyBitwise(T* gpuData, int len) { - T val = 0xff; - - for (int i = 0; i < len; ++i) { - // 9th element should be 1 - val &= (2 * i + 7); - } - - if (val != gpuData[8]) { - printf("atomicAnd failed: gpuData[8]=%llu, expect=%llu\n", - (unsigned long long)gpuData[8], (unsigned long long)val); - return false; - } - - val = 0; - - for (int i = 0; i < len; ++i) { - // 10th element should be 0xff - val |= (1 << i); - } - - if (val != gpuData[9]) { - printf("atomicOr failed\n"); - return false; - } - - val = 0xff; - - for (int i = 0; i < len; ++i) { - // 11th element should be 0xff - val ^= i; - } - - if (val != gpuData[10]) { - printf("atomicXor failed\n"); - return false; - } - - return true; -} - -bool verifySub(...) { - return true; -} - -template< - typename T, - typename enable_if< - is_same{} || is_same{}>::type* = nullptr> -bool verifySub(T* gpuData, int len) { - T val = 0; - - for (int i = 0; i < len; ++i) { - val -= 10; - } - - if (val != gpuData[1]) { - printf("atomicSub failed: gpuData[1]=%d, expected=%d\n", - (int)gpuData[1], (int)val); - return false; - } else { - printf("atomicSub succeeded: gpuData[1]=%d, expected=%d\n", - (int)gpuData[1], (int)val); - } - return true; -} - -bool verifyExch(...) { - return true; -} - -template {}>::type* = nullptr> -bool computeExchExch(T* gpuData, int len) { - T val = 0; - - bool found = false; - - for (T i = 0; i < len; ++i) { - if (i == gpuData[2]) { - found = true; - break; - } - } - - if (!found) { - printf("atomicExch failed\n"); - return false; - } - return true; -} - -bool VerifyIntegral(...) { - return true; -} - -template{}>::type* = nullptr> -bool VerifyIntegral(T* gpuData, int len) { - T val = 0; - - bool found = false; - - for (T i = 0; i < len; ++i) { - // fourth element should be len-1 - val = max(val, i); - } - - if (val != gpuData[3]) { - printf("atomicMax failed: gpuData[3]=%llu, expected=%llu\n", - (unsigned long long)gpuData[3], (unsigned long long)val); - return false; - } else { - printf("atomicMax succeeded: gpuData[3]=%llu, expected=%llu\n", - (unsigned long long)gpuData[3], (unsigned long long)val); - } - - val = 1 << 8; - - for (T i = 0; i < len; ++i) { - val = min(val, i); - } - - if (val != gpuData[4]) { - printf("atomicMin failed\n"); - return false; - } - - int limit = 17; - val = 0; - - for (int i = 0; i < len; ++i) { - val = (val >= limit) ? 0 : val + 1; - } - - if (val != gpuData[5]) { - printf("atomicInc failed\n"); - return false; - } - - limit = 137; - val = 0; - - for (int i = 0; i < len; ++i) { - val = ((val == 0) || (val > limit)) ? limit : val - 1; - } - - if (val != gpuData[6]) { - printf("atomicDec failed\n"); - return false; - } - - found = false; - - for (T i = 0; i < len; ++i) { - // eighth element should be a member of [0, len) - if (i == gpuData[7]) { - found = true; - break; - } - } - if (!found) { - printf("atomicCAS failed\n"); - return false; - } - return verifyBitwise(gpuData, len) && verifySub(gpuData, len); -} - -template -bool verifyData(T* gpuData, int len) { - T val = 0; - - for (int i = 0; i < len; ++i) { - val += 10; - } - - if (val != gpuData[0]) { - printf("atomicAdd failed\n"); - return false; - } - - return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len); -} - -__device__ -void testKernelExch(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelExch(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - - // Atomic exchange - atomicExch(&g_odata[2], tid); -} - -__device__ -void testKernelSub(...) {} - -template< - typename T, - typename enable_if< - is_same{} || is_same{}>::type* = nullptr> -__device__ -void testKernelSub(T* g_odata) { - // Atomic subtraction (final should be 0) - atomicSub(&g_odata[1], 10); -} - -__device__ -void testKernelIntegral(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelIntegral(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - - // Atomic maximum - atomicMax(&g_odata[3], tid); - - // Atomic minimum - atomicMin(&g_odata[4], tid); - - // Atomic increment (modulo 17+1) - atomicInc((unsigned int*)&g_odata[5], 17); - - // Atomic decrement - atomicDec((unsigned int*)&g_odata[6], 137); - - // Atomic compare-and-swap - atomicCAS(&g_odata[7], tid - 1, tid); - - // Bitwise atomic instructions - - // Atomic AND - atomicAnd(&g_odata[8], 2 * tid + 7); - - // Atomic OR - atomicOr(&g_odata[9], 1 << tid); - - // Atomic XOR - atomicXor(&g_odata[10], tid); - - testKernelSub(g_odata); -} - -template -__global__ void testKernel(T* g_odata) { - // Atomic addition - atomicAdd(&g_odata[0], 10); - - testKernelIntegral(g_odata); - testKernelExch(g_odata); -} - -template -void runTest() { - bool testResult = true; - unsigned int numThreads = 256; - unsigned int numBlocks = 64; - unsigned int numData = 11; - unsigned int memSize = sizeof(T) * numData; - - printf("runTest<%s>, total thread=%u\n", typeid(T).name(), numThreads*numBlocks); - - // allocate mem for the result on host side - T* hOData = (T*)malloc(memSize); - - // initialize the memory - for (unsigned int i = 0; i < numData; i++) hOData[i] = 0; - - // To make the AND and XOR tests generate something other than 0... - hOData[8] = hOData[10] = 0xff; - - // allocate device memory for result - T* dOData; - hipMalloc((void**)&dOData, memSize); - // copy host memory to device to initialize to zero - hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice); - - // execute the kernel - hipLaunchKernelGGL( - testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData); - - // Copy result from device to host - hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost); - - // Compute reference solution - testResult = verifyData(hOData, numThreads * numBlocks); - - // Cleanup memory - free(hOData); - hipFree(dOData); - - if(!testResult) { - failed("runTest<%s> failed\n", typeid(T).name()); - } -} - -int main(int argc, char** argv) { - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - // Statistics about the GPU device - printf( - "> GPU device has %d Multi-Processors, " - "SM %d.%d compute capabilities\n\n", - deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor); - - runTest(); - runTest(); - runTest(); - runTest(); - runTest(); - - hipDeviceReset(); - passed(); -} diff --git a/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp deleted file mode 100644 index 10ea8482c2..0000000000 --- a/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include - -#include "test_common.h" - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void single_precision_intrinsics() { - float fX, fY; - - __cosf(0.0f); - __exp10f(0.0f); - __expf(0.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fadd_rd(0.0f, 1.0f); -#endif - __fadd_rn(0.0f, 1.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fadd_ru(0.0f, 1.0f); - __fadd_rz(0.0f, 1.0f); - __fdiv_rd(4.0f, 2.0f); -#endif - __fdiv_rn(4.0f, 2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fdiv_ru(4.0f, 2.0f); - __fdiv_rz(4.0f, 2.0f); -#endif - __fdividef(4.0f, 2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fmaf_rd(1.0f, 2.0f, 3.0f); -#endif - __fmaf_rn(1.0f, 2.0f, 3.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fmaf_ru(1.0f, 2.0f, 3.0f); - __fmaf_rz(1.0f, 2.0f, 3.0f); - __fmul_rd(1.0f, 2.0f); -#endif - __fmul_rn(1.0f, 2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fmul_ru(1.0f, 2.0f); - __fmul_rz(1.0f, 2.0f); - __frcp_rd(2.0f); -#endif - __frcp_rn(2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __frcp_ru(2.0f); - __frcp_rz(2.0f); -#endif - __frsqrt_rn(4.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fsqrt_rd(4.0f); -#endif - __fsqrt_rn(4.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fsqrt_ru(4.0f); - __fsqrt_rz(4.0f); - __fsub_rd(2.0f, 1.0f); -#endif - __fsub_rn(2.0f, 1.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fsub_ru(2.0f, 1.0f); - __fsub_rz(2.0f, 1.0f); -#endif - __log10f(1.0f); - __log2f(1.0f); - __logf(1.0f); - __powf(1.0f, 0.0f); - __saturatef(0.1f); - __sincosf(0.0f, &fX, &fY); - __sinf(0.0f); - __tanf(0.0f); -} - - -__global__ void compileSinglePrecisionIntrinsics(int ignored) { - single_precision_intrinsics(); -} - - -int main() { - hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, 1); - passed(); -} diff --git a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp b/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp deleted file mode 100644 index cf8d676c51..0000000000 --- a/tests/src/deviceLib/hipSinglePrecisionMathDevice.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void single_precision_math_functions() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - ceilf(0.0f); - copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - cospif(0.0f); - // cyl_bessel_i0f(0.0f); - // cyl_bessel_i1f(0.0f); - erfcf(0.0f); - erfcinvf(2.0f); - erfcxf(0.0f); - erff(0.0f); - erfinvf(1.0f); - exp10f(0.0f); - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fabsf(1.0f); - fdimf(1.0f, 0.0f); - fdividef(0.0f, 1.0f); - floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fmaxf(0.0f, 0.0f); - fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - isfinite(0.0f); - isinf(0.0f); - isnan(0.0f); - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - ldexpf(0.0f, 0); - // lgammaf(1.0f); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - // modff(0.0f, &fX); - nanf("1"); - nearbyintf(0.0f); - // nextafterf(0.0f); - norm3df(1.0f, 0.0f, 0.0f); - norm4df(1.0f, 0.0f, 0.0f, 0.0f); - normcdff(0.0f); - normcdfinvf(1.0f); - fX = 1.0f; - normf(1, &fX); - powf(1.0f, 0.0f); - // rcbrtf(1.0f); - remainderf(2.0f, 1.0f); - // remquof(1.0f, 2.0f, &iX); - rhypotf(0.0f, 1.0f); - rintf(1.0f); - rnorm3df(0.0f, 0.0f, 1.0f); - rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); - fX = 1.0f; - rnormf(1, &fX); - roundf(0.0f); - rsqrtf(1.0f); - // scalblnf(0.0f, 1); - // scalbnf(0.0f, 1); - signbit(1.0f); - sincosf(0.0f, &fX, &fY); - sincospif(0.0f, &fX, &fY); - sinf(0.0f); - sinhf(0.0f); - sinpif(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - truncf(0.0f); - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); -} - -__global__ void compileSinglePrecisionMathOnDevice(int ignored) { - single_precision_math_functions(); -} - -int main() { - hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, 1); - passed(); -} diff --git a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp deleted file mode 100644 index a61b4d5982..0000000000 --- a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -//#include -#include "test_common.h" -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__host__ void single_precision_math_functions() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - ceilf(0.0f); - copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - // cospif(0.0f); - // cyl_bessel_i0f(0.0f); - // cyl_bessel_i1f(0.0f); - erfcf(0.0f); - // erfcinvf(2.0f); - // erfcxf(0.0f); - erff(0.0f); - // erfinvf(1.0f); - #ifdef __unix__ - exp10f(0.0f); - #endif - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fabsf(1.0f); - fdimf(1.0f, 0.0f); -#if defined(__HIP_PLATFORM_AMD__) - // fdividef(0.0f, 1.0f); -#endif - floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fmaxf(0.0f, 0.0f); - fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - std::isfinite(0.0f); - std::isinf(0.0f); - std::isnan(0.0f); - #ifdef __unix__ - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - #endif - ldexpf(0.0f, 0); - lgammaf(1.0f); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - modff(0.0f, &fX); - nanf("1"); - nearbyintf(0.0f); - // nextafterf(0.0f); -#if defined(__HIP_PLATFORM_AMD__) - // norm3df(1.0f, 0.0f, 0.0f); - // norm4df(1.0f, 0.0f, 0.0f, 0.0f); -#endif - // normcdff(0.0f); - // normcdfinvf(1.0f); - // fX = 1.0f; normf(1, &fX); - powf(1.0f, 0.0f); - // rcbrtf(1.0f); - remainderf(2.0f, 1.0f); - remquof(1.0f, 2.0f, &iX); -#if defined(__HIP_PLATFORM_AMD__) - // rhypotf(0.0f, 1.0f); -#endif - rintf(1.0f); -#if defined(__HIP_PLATFORM_AMD__) - // rnorm3df(0.0f, 0.0f, 1.0f); - // rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); - fX = 1.0f; // rnormf(1, &fX); -#endif - roundf(0.0f); - /// rsqrtf(1.0f); - scalblnf(0.0f, 1); - scalbnf(0.0f, 1); - std::signbit(1.0f); - #ifdef __unix__ - sincosf(0.0f, &fX, &fY); - #endif - // sincospif(0.0f, &fX, &fY); - sinf(0.0f); - sinhf(0.0f); - // sinpif(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - truncf(0.0f); - #ifdef __unix__ - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); - #endif -} - -static void compileOnHost() { single_precision_math_functions(); } - -int main() { - compileOnHost(); - passed(); -} diff --git a/tests/src/deviceLib/hipStdComplex.cpp b/tests/src/deviceLib/hipStdComplex.cpp deleted file mode 100644 index 4870a77a5b..0000000000 --- a/tests/src/deviceLib/hipStdComplex.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ -#include "test_common.h" -#include -#ifdef __HIP_PLATFORM_AMD__ -#include -using namespace std; -#else -#include -using namespace cuda::std; -#endif - -// Tolerance for error -const double tolerance = 1e-6; -const bool verbose = false; - -#define LEN 64 - -#define ALL_FUN \ - OP(add) \ - OP(sub) \ - OP(mul) \ - OP(div) \ - OP(abs) \ - OP(arg) \ - OP(sin) \ - OP(cos) - -#define OP(x) CK_##x, -enum CalcKind { - ALL_FUN -}; -#undef OP - -#define OP(x) case CK_##x: return #x; -std::string getName(enum CalcKind CK) { - switch(CK){ - ALL_FUN - } - return ""; // To prevent compile warning -} -#undef OP - -// Calculates function. -// If the function has one argument, B is ignored. -// If the function returns real number, converts it to a complex number. -#define ONE_ARG(func) \ - case CK_##func: \ - return complex(func(A)); - -template -__device__ __host__ complex calc(complex A, - complex B, - enum CalcKind CK) { - switch(CK) { - case CK_add: - return A + B; - case CK_sub: - return A - B; - case CK_mul: - return A * B; - case CK_div: - return A / B; - - ONE_ARG(abs) - ONE_ARG(arg) - ONE_ARG(sin) - ONE_ARG(cos) - } - return A; // To prevent compile warning -} - -template -__global__ void kernel(complex* A, - complex* B, complex* C, - enum CalcKind CK) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - C[tx] = calc(A[tx], B[tx], CK); -} - -template -void test() { - typedef complex ComplexT; - - ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D; - A = new ComplexT[LEN]; - B = new ComplexT[LEN]; - C = new ComplexT[LEN]; - D = new ComplexT[LEN]; - hipMalloc((void**)&Ad, sizeof(ComplexT)*LEN); - hipMalloc((void**)&Bd, sizeof(ComplexT)*LEN); - hipMalloc((void**)&Cd, sizeof(ComplexT)*LEN); - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f); - B[i] = A[i]; - C[i] = A[i]; - } - hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice); - - // Run kernel for a calculation kind and verify by comparing with host - // calculation result. Returns false if fails. - auto test_fun = [&](enum CalcKind CK) { - hipLaunchKernelGGL(kernel, dim3(1), dim3(LEN), 0, 0, Ad, Bd, Cd, CK); - hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost); - for (int i = 0; i < LEN; i++) { - ComplexT Expected = calc(A[i], B[i], CK); - FloatT error = abs(C[i] - Expected); - if (abs(Expected) > tolerance) - error /= abs(Expected); - bool pass = error < tolerance; - if (verbose || !pass) { - std::cout << "Function: " << getName(CK) - << " Operands: " << A[i].real() << ", " << A[i].imag() << "; " <(); - test(); - passed(); - return 0; -} diff --git a/tests/src/deviceLib/hipTestAtomicAdd.cpp b/tests/src/deviceLib/hipTestAtomicAdd.cpp deleted file mode 100644 index 25f818cf8b..0000000000 --- a/tests/src/deviceLib/hipTestAtomicAdd.cpp +++ /dev/null @@ -1,329 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - - (TestCase 1):: - 1) Execute atomicAdd in multi threaded scenario by diverging the data across - multiple threads and validate the output at the end of all operations. - 2) Execute atomicAddNoRet in multi threaded scenario by diverging the data - across multiple threads and validate the output at the end of all operations. - - (TestCase 2):: - 3) Execute atomicAdd API and validate the result. - 4) Execute atomicAddNoRet API and validate the result. - - (TestCase 3):: - 5) atomicadd/NoRet negative scenarios (TBD). - -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS --gpu-architecture=compute_60 - * TEST_NAMED: %t hipTestAtomicnoret-manywaves --atomicnoret --tests 1 - * TEST_NAMED: %t hipTestAtomicnoret-simple --atomicnoret --tests 2 - * TEST_NAMED: %t hipTestAtomic-manywaves --tests 1 - * TEST_NAMED: %t hipTestAtomic-simple --tests 2 - * HIT_END - */ - -#include -#include "test_common.h" - -/* - * Defines initial and increment values - */ -#define INCREMENT_VALUE 10 - -#define INT_INITIAL_VALUE 10 -#define FLOAT_INITIAL_VALUE 10.50 -#define DOUBLE_INITIAL_VALUE 200.12 -#define LONG_INITIAL_VALUE 10000 -#define UNSIGNED_INITIAL_VALUE 20 - -#ifdef __HIP_PLATFORM_NVIDIA__ -// atomicAddNoRet is unavailable in cuda -template -__device__ void atomicAddNoRet(T* x, int y) { - atomicAdd(x, static_cast(y)); -} -#endif - - -/* - * Square each element in the array A and write to array C. - */ -bool p_atomicNoRet = false; - -template -__global__ void atomicnoret_manywaves(T* C_d) { - size_t tid = (blockIdx.x * blockDim.x + threadIdx.x); - switch (tid % 9) { - case 0: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 1: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 2: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 3: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 4: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 5: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 6: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 7: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - case 8: - atomicAddNoRet(C_d, INCREMENT_VALUE); - break; - } -} - - - -template -__global__ void atomic_manywaves(T* C_d) { - size_t tid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - - switch (tid % 9) { - case 0: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 1: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 2: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 3: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 4: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 5: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 6: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 7: - atomicAdd(C_d, INCREMENT_VALUE); - break; - case 8: - atomicAdd(C_d, INCREMENT_VALUE); - break; - } -} - - -template -__global__ void atomicnoret_simple(T* C_d) { - atomicAddNoRet(C_d, INCREMENT_VALUE); -} - -template -__global__ void atomic_simple(T* C_d) { - atomicAdd(C_d, INCREMENT_VALUE); -} - - -template -bool atomictest_manywaves(const T& initial_val) { - unsigned int ThreadsperBlock = 10; - unsigned int numBlocks = 1; - bool testPassed = true; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIPCHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIPCHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIPCHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - if (hOData[0] != initial_val+(INCREMENT_VALUE*(ThreadsperBlock*numBlocks))) - testPassed = false; - - // Cleanup memory - free(hOData); - hipFree(dOData); - - return testPassed; -} - -template -bool atomictestnoret_manywaves(const T& initial_val) { - unsigned int ThreadsperBlock = 10; - unsigned int numBlocks = 1; - bool testPassed = true; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIPCHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIPCHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIPCHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - if (hOData[0] != initial_val+(INCREMENT_VALUE*(ThreadsperBlock*numBlocks))) - testPassed = false; - - // Cleanup memory - free(hOData); - hipFree(dOData); - - return testPassed; -} - -template -bool atomictest_simple(const T& initial_val) { - unsigned int ThreadsperBlock = 1; - unsigned int numBlocks = 1; - bool testPassed = true; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIPCHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIPCHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomic_simple, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIPCHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - if (hOData[0] != initial_val+INCREMENT_VALUE) - testPassed = false; - - // Cleanup memory - free(hOData); - hipFree(dOData); - - return testPassed; -} - - -template -bool atomictestnoret_simple(const T& initial_val) { - unsigned int ThreadsperBlock = 1; - unsigned int numBlocks = 1; - bool testPassed = true; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIPCHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIPCHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIPCHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - if (hOData[0] != initial_val+INCREMENT_VALUE) - testPassed = false; - - // Cleanup memory - free(hOData); - hipFree(dOData); - - return testPassed; -} - - -// Parse arguments specific to this test. -void parseMyArguments(int argc, char* argv[]) { - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - if (!strcmp(arg, "--atomicnoret")) { - p_atomicNoRet = true; - } else { - failed("Bad argument '%s'", arg); - } - } -} - -int main(int argc, char* argv[]) { - parseMyArguments(argc, argv); - HIPCHECK(hipSetDevice(p_gpuDevice)); - bool TestPassed = true; - - if (p_tests == 1) { - if (!p_atomicNoRet) { - TestPassed &= atomictest_manywaves(INT_INITIAL_VALUE); - TestPassed &= atomictest_manywaves(UNSIGNED_INITIAL_VALUE); - TestPassed &= atomictest_manywaves(FLOAT_INITIAL_VALUE); - TestPassed &= - atomictest_manywaves(LONG_INITIAL_VALUE); - TestPassed &= - atomictest_manywaves(DOUBLE_INITIAL_VALUE); - } else { - atomictestnoret_manywaves(FLOAT_INITIAL_VALUE); - } - } else if (p_tests == 2) { - if (!p_atomicNoRet) { - TestPassed &= atomictest_simple(INT_INITIAL_VALUE); - TestPassed &= atomictest_simple(UNSIGNED_INITIAL_VALUE); - TestPassed &= atomictest_simple(FLOAT_INITIAL_VALUE); - TestPassed &= atomictest_simple(LONG_INITIAL_VALUE); - TestPassed &= atomictest_simple(DOUBLE_INITIAL_VALUE); - } else { - TestPassed &= atomictestnoret_simple(FLOAT_INITIAL_VALUE); - } - } else { - printf("Didnt receive any valid option. Try options 1 or 2\n"); - TestPassed = false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipTestAtomicAdd TC validation Failed!"); - } -} diff --git a/tests/src/deviceLib/hipTestClock.cpp b/tests/src/deviceLib/hipTestClock.cpp deleted file mode 100644 index 7e70d32b4d..0000000000 --- a/tests/src/deviceLib/hipTestClock.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define HIP_ASSERT(status) assert(status == hipSuccess) - -#define LEN 512 -#define SIZE (LEN * sizeof(long long)) - - static __global__ void kernel1(long long* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = clock() + clock64() + __clock() + __clock64(); - } - - static __global__ void kernel2(long long* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid]; - } - - static __global__ void kernel1_gfx11(long long* Ad) { -#ifdef __HIP_PLATFORM_AMD__ - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = clock() + wall_clock64() + __clock() + __clock64(); -#endif - } - - static __global__ void kernel2_gfx11(long long* Ad) { -#ifdef __HIP_PLATFORM_AMD__ - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = clock() + wall_clock64() + __clock() + __clock64() - Ad[tid]; -#endif - } - - void run() { - long long *A, *Ad; - A = new long long[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - - auto kernel1_used = IsGfx11() ? kernel1_gfx11 : kernel1; - auto kernel2_used = IsGfx11() ? kernel2_gfx11 : kernel2; - - HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); - - hipLaunchKernelGGL(kernel1_used, dim3(1, 1, 1), - dim3(LEN, 1, 1), 0, 0, Ad); - hipLaunchKernelGGL(kernel2_used, dim3(1, 1, 1), - dim3(LEN, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(0 != A[i]); - } - } - -int main() { - run(); - passed(); -} diff --git a/tests/src/deviceLib/hipTestDevice.cpp b/tests/src/deviceLib/hipTestDevice.cpp deleted file mode 100644 index 95d0044265..0000000000 --- a/tests/src/deviceLib/hipTestDevice.cpp +++ /dev/null @@ -1,742 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include -#include - -#define N 512 -#define SIZE N * sizeof(float) - -__global__ void test_sincosf(float* a, float* b, float* c) { - int tid = threadIdx.x; - sincosf(a[tid], b + tid, c + tid); -} - -__global__ void test_sincospif(float* a, float* b, float* c) { - int tid = threadIdx.x; - sincospif(a[tid], b + tid, c + tid); -} - -__global__ void test_fdividef(float* a, float* b, float* c) { - int tid = threadIdx.x; - c[tid] = fdividef(a[tid], b[tid]); -} - -__global__ void test_llrintf(float* a, long long int* b) { - int tid = threadIdx.x; - b[tid] = llrintf(a[tid]); -} - -__global__ void test_lrintf(float* a, long int* b) { - int tid = threadIdx.x; - b[tid] = lrintf(a[tid]); -} - -__global__ void test_rintf(float* a, float* b) { - int tid = threadIdx.x; - b[tid] = rintf(a[tid]); -} - -__global__ void test_llroundf(float* a, long long int* b) { - int tid = threadIdx.x; - b[tid] = llroundf(a[tid]); -} - -__global__ void test_lroundf(float* a, long int* b) { - int tid = threadIdx.x; - b[tid] = lroundf(a[tid]); -} - -__global__ void test_rhypotf(float* a, float* b, float* c) { - int tid = threadIdx.x; - c[tid] = rhypotf(a[tid], b[tid]); -} - -__global__ void test_norm3df(float* a, float* b, float* c, float* d) { - int tid = threadIdx.x; - d[tid] = norm3df(a[tid], b[tid], c[tid]); -} - -__global__ void test_norm4df(float* a, float* b, float* c, float* d, float* e) { - int tid = threadIdx.x; - e[tid] = norm4df(a[tid], b[tid], c[tid], d[tid]); -} - -__global__ void test_normf(float* a, float* b) { - int tid = threadIdx.x; - b[tid] = normf(N, a); -} - -__global__ void test_rnorm3df(float* a, float* b, float* c, float* d) { - int tid = threadIdx.x; - d[tid] = rnorm3df(a[tid], b[tid], c[tid]); -} - -__global__ void test_rnorm4df(float* a, float* b, float* c, float* d, float* e) { - int tid = threadIdx.x; - e[tid] = rnorm4df(a[tid], b[tid], c[tid], d[tid]); -} - -__global__ void test_rnormf(float* a, float* b) { - int tid = threadIdx.x; - b[tid] = rnormf(N, a); -} - -__global__ void test_erfinvf(float* a, float* b) { - int tid = threadIdx.x; - b[tid] = erff(erfinvf(a[tid])); -} - - -bool run_sincosf() { - float *A, *Ad, *B, *C, *Bd, *Cd; - A = new float[N]; - B = new float[N]; - C = new float[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_sincosf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[i] == sinf(1.0f)) { - passed = 1; - } - } - passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] == cosf(1.0f)) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_sincospif() { - float *A, *Ad, *B, *C, *Bd, *Cd; - A = new float[N]; - B = new float[N]; - C = new float[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_sincospif, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[i] - sinf(3.14 * 1.0f) < 0.1) { - passed = 1; - } - } - passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] - cosf(3.14 * 1.0f) < 0.1) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_fdividef() { - float *A, *Ad, *B, *C, *Bd, *Cd; - A = new float[N]; - B = new float[N]; - C = new float[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_fdividef, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] == A[i] / B[i]) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_llrintf() { - float *A, *Ad; - long long int *B, *Bd; - A = new float[N]; - B = new long long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_llrintf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - int x = roundf(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_lrintf() { - float *A, *Ad; - long int *B, *Bd; - A = new float[N]; - B = new long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_lrintf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - int x = roundf(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rintf() { - float *A, *Ad; - float *B, *Bd; - A = new float[N]; - B = new float[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rintf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - float x = roundf(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - - -bool run_llroundf() { - float *A, *Ad; - long long int *B, *Bd; - A = new float[N]; - B = new long long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_llroundf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - int x = roundf(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_lroundf() { - float *A, *Ad; - long int *B, *Bd; - A = new float[N]; - B = new long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_lroundf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - int x = roundf(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - - -bool run_norm3df() { - float *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd; - A = new float[N]; - B = new float[N]; - C = new float[N]; - D = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - C[i] = 3.0f; - } - val = sqrtf(1.0f + 4.0f + 9.0f); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_norm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); - hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (D[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_norm4df() { - float *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd, *E, *Ed; - A = new float[N]; - B = new float[N]; - C = new float[N]; - D = new float[N]; - E = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - C[i] = 3.0f; - D[i] = 4.0f; - } - val = sqrtf(1.0f + 4.0f + 9.0f + 16.0f); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMalloc((void**)&Ed, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_norm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); - hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (E[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - delete[] E; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - hipFree(Ed); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_normf() { - float *A, *Ad, *B, *Bd; - A = new float[N]; - B = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 0.0f; - val += 1.0f; - } - val = sqrtf(val); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_normf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[0] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rhypotf() { - float *A, *Ad, *B, *Bd, *C, *Cd; - A = new float[N]; - B = new float[N]; - C = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - val = 1 / sqrtf(1.0f + 4.0f); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rhypotf, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rnorm3df() { - float *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd; - A = new float[N]; - B = new float[N]; - C = new float[N]; - D = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - C[i] = 3.0f; - } - val = 1 / sqrtf(1.0f + 4.0f + 9.0f); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rnorm3df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); - hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (D[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rnorm4df() { - float *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd, *E, *Ed; - A = new float[N]; - B = new float[N]; - C = new float[N]; - D = new float[N]; - E = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - C[i] = 3.0f; - D[i] = 4.0f; - } - val = 1 / sqrtf(1.0f + 4.0f + 9.0f + 16.0f); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMalloc((void**)&Ed, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rnorm4df, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); - hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (E[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - delete[] E; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - hipFree(Ed); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rnormf() { - float *A, *Ad, *B, *Bd; - A = new float[N]; - B = new float[N]; - float val = 0.0f; - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 0.0f; - val += 1.0f; - } - val = 1 / sqrtf(val); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rnormf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[0] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_erfinvf() { - float *A, *Ad, *B, *Bd; - A = new float[N]; - B = new float[N]; - for (int i = 0; i < N; i++) { - A[i] = -0.6f; - B[i] = 0.0f; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_erfinvf, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[i] - A[i] < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -int main() { - if (run_sincosf() && run_sincospif() && run_fdividef() && run_llrintf() && run_norm3df() && - run_norm4df() && run_normf() && run_rnorm3df() && run_rnorm4df() && run_rnormf() && - run_lroundf() && run_llroundf() && run_rintf() && run_rhypotf() && run_erfinvf()) { - passed(); - } -} diff --git a/tests/src/deviceLib/hipTestDeviceDouble.cpp b/tests/src/deviceLib/hipTestDeviceDouble.cpp deleted file mode 100644 index 0bc6ec4685..0000000000 --- a/tests/src/deviceLib/hipTestDeviceDouble.cpp +++ /dev/null @@ -1,664 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * XXBUILD: %t %s ../test_common.cpp - * XXTEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include -#include - -#define N 512 -#define SIZE N * sizeof(double) - -__global__ void test_sincos(double* a, double* b, double* c) { - int tid = threadIdx.x; - sincos(a[tid], b + tid, c + tid); -} - -__global__ void test_sincospi(double* a, double* b, double* c) { - int tid = threadIdx.x; - sincospi(a[tid], b + tid, c + tid); -} - -__global__ void test_llrint(double* a, long long int* b) { - int tid = threadIdx.x; - b[tid] = llrint(a[tid]); -} - -__global__ void test_lrint(double* a, long int* b) { - int tid = threadIdx.x; - b[tid] = lrint(a[tid]); -} - -__global__ void test_rint(double* a, double* b) { - int tid = threadIdx.x; - b[tid] = rint(a[tid]); -} - -__global__ void test_llround(double* a, long long int* b) { - int tid = threadIdx.x; - b[tid] = llround(a[tid]); -} - -__global__ void test_lround(double* a, long int* b) { - int tid = threadIdx.x; - b[tid] = lround(a[tid]); -} - -__global__ void test_rhypot(double* a, double* b, double* c) { - int tid = threadIdx.x; - c[tid] = rhypot(a[tid], b[tid]); -} - -__global__ void test_norm3d(double* a, double* b, double* c, double* d) { - int tid = threadIdx.x; - d[tid] = norm3d(a[tid], b[tid], c[tid]); -} - -__global__ void test_norm4d(double* a, double* b, double* c, double* d, - double* e) { - int tid = threadIdx.x; - e[tid] = norm4d(a[tid], b[tid], c[tid], d[tid]); -} - -__global__ void test_rnorm3d(double* a, double* b, double* c, double* d) { - int tid = threadIdx.x; - d[tid] = rnorm3d(a[tid], b[tid], c[tid]); -} - -__global__ void test_rnorm4d(double* a, double* b, double* c, double* d, - double* e) { - int tid = threadIdx.x; - e[tid] = rnorm4d(a[tid], b[tid], c[tid], d[tid]); -} - -__global__ void test_rnorm(double* a, double* b) { - int tid = threadIdx.x; - b[tid] = rnorm(N, a); -} - -__global__ void test_erfinv(double* a, double* b) { - int tid = threadIdx.x; - b[tid] = erf(erfinv(a[tid])); -} - -bool run_sincos() { - double *A, *Ad, *B, *C, *Bd, *Cd; - A = new double[N]; - B = new double[N]; - C = new double[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_sincos, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[i] == sin(1.0)) { - passed = 1; - } - } - passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] == cos(1.0)) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_sincospi() { - double *A, *Ad, *B, *C, *Bd, *Cd; - A = new double[N]; - B = new double[N]; - C = new double[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_sincospi, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[i] - sin(3.14 * 1.0) < 0.1) { - passed = 1; - } - } - passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] - cos(3.14 * 1.0) < 0.1) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - - -bool run_llrint() { - double *A, *Ad; - long long int *B, *Bd; - A = new double[N]; - B = new long long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_llrint, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - int x = round(A[i]); - long long int y = x; - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_lrint() { - double *A, *Ad; - long int *B, *Bd; - A = new double[N]; - B = new long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_lrint, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - long int x = round(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rint() { - double *A, *Ad; - double *B, *Bd; - A = new double[N]; - B = new double[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rint, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - double x = round(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - - -bool run_llround() { - double *A, *Ad; - long long int *B, *Bd; - A = new double[N]; - B = new long long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_llround, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - long long int x = round(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_lround() { - double *A, *Ad; - long int *B, *Bd; - A = new double[N]; - B = new long int[N]; - for (int i = 0; i < N; i++) { - A[i] = 1.345; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, N * sizeof(long int)); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_lround, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - long int x = round(A[i]); - if (B[i] == x) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - - -bool run_norm3d() { - double *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd; - A = new double[N]; - B = new double[N]; - C = new double[N]; - D = new double[N]; - double val = 0.0; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - B[i] = 2.0; - C[i] = 3.0; - } - val = sqrt(1.0 + 4.0 + 9.0); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_norm3d, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); - hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (D[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_norm4d() { - double *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd, *E, *Ed; - A = new double[N]; - B = new double[N]; - C = new double[N]; - D = new double[N]; - E = new double[N]; - double val = 0.0; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - B[i] = 2.0; - C[i] = 3.0; - D[i] = 4.0; - } - val = sqrt(1.0 + 4.0 + 9.0 + 16.0); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMalloc((void**)&Ed, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_norm4d, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); - hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (E[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - delete[] E; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - hipFree(Ed); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - - -bool run_rhypot() { - double *A, *Ad, *B, *Bd, *C, *Cd; - A = new double[N]; - B = new double[N]; - C = new double[N]; - double val = 0.0; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - B[i] = 2.0; - } - val = 1 / sqrt(1.0 + 4.0); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rhypot, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd); - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (C[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rnorm3d() { - double *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd; - A = new double[N]; - B = new double[N]; - C = new double[N]; - D = new double[N]; - double val = 0.0; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - B[i] = 2.0; - C[i] = 3.0; - } - val = 1 / sqrt(1.0 + 4.0 + 9.0); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rnorm3d, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd); - hipMemcpy(D, Dd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (D[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rnorm4d() { - double *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd, *E, *Ed; - A = new double[N]; - B = new double[N]; - C = new double[N]; - D = new double[N]; - E = new double[N]; - double val = 0.0; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - B[i] = 2.0; - C[i] = 3.0; - D[i] = 4.0; - } - val = 1 / sqrt(1.0 + 4.0 + 9.0 + 16.0); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMalloc((void**)&Cd, SIZE); - hipMalloc((void**)&Dd, SIZE); - hipMalloc((void**)&Ed, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rnorm4d, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed); - hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (E[i] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - delete[] C; - delete[] D; - delete[] E; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - hipFree(Dd); - hipFree(Ed); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_rnorm() { - double *A, *Ad, *B, *Bd; - A = new double[N]; - B = new double[N]; - double val = 0.0; - for (int i = 0; i < N; i++) { - A[i] = 1.0; - B[i] = 0.0; - val += 1.0; - } - val = 1 / sqrt(val); - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_rnorm, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[0] - val < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -bool run_erfinv() { - double *A, *Ad, *B, *Bd; - A = new double[N]; - B = new double[N]; - for (int i = 0; i < N; i++) { - A[i] = -0.6; - B[i] = 0.0; - } - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(test_erfinv, dim3(1), dim3(N), 0, 0, Ad, Bd); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - int passed = 0; - for (int i = 0; i < 512; i++) { - if (B[i] - A[i] < 0.000001) { - passed = 1; - } - } - - delete[] A; - delete[] B; - hipFree(Ad); - hipFree(Bd); - - if (passed == 1) { - return true; - } - assert(passed == 1); - return false; -} - -int main() { - if (run_sincos() && run_sincospi() && run_llrint() && run_norm3d() && run_norm4d() && - run_rnorm3d() && run_rnorm4d() && run_rnorm() && run_lround() && run_llround() && - run_rint() && run_rhypot() && run_erfinv()) { - passed(); - } -} diff --git a/tests/src/deviceLib/hipTestDeviceLimit.cpp b/tests/src/deviceLib/hipTestDeviceLimit.cpp deleted file mode 100644 index e4118d0c36..0000000000 --- a/tests/src/deviceLib/hipTestDeviceLimit.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - -int main() { - size_t heap; - assert(hipSuccess == hipDeviceGetLimit(&heap, hipLimitMallocHeapSize)); - assert(heap == 4194304); -} diff --git a/tests/src/deviceLib/hipTestDeviceSymbol.cpp b/tests/src/deviceLib/hipTestDeviceSymbol.cpp deleted file mode 100644 index 193ebe52e4..0000000000 --- a/tests/src/deviceLib/hipTestDeviceSymbol.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" -#include - -#define NUM 1024 -#define SIZE 1024 * 4 - -__device__ int globalIn[NUM]; -__device__ int globalOut[NUM]; - -__global__ void Assign(int* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Out[tid] = globalIn[tid]; - globalOut[tid] = globalIn[tid]; -} - -__device__ __constant__ int globalConst[NUM]; - -__global__ void checkAddress(int* addr, bool* out) { - *out = (globalConst == addr); -} - -int main() { - int *A, *Am, *B, *Ad, *C, *Cm; - A = new int[NUM]; - B = new int[NUM]; - C = new int[NUM]; - for (int i = 0; i < NUM; i++) { - A[i] = -1 * i; - B[i] = 0; - C[i] = 0; - } - - hipMalloc((void**)&Ad, SIZE); - hipHostMalloc((void**)&Am, SIZE); - hipHostMalloc((void**)&Cm, SIZE); - for (int i = 0; i < NUM; i++) { - Am[i] = -1 * i; - Cm[i] = 0; - } - - hipStream_t stream; - hipStreamCreate(&stream); - hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream); - hipStreamSynchronize(stream); - hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); - hipStreamSynchronize(stream); - for (int i = 0; i < NUM; i++) { - assert(Am[i] == B[i]); - assert(Am[i] == Cm[i]); - } - - for (int i = 0; i < NUM; i++) { - A[i] = -2 * i; - B[i] = 0; - } - - hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice); - hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost); - for (int i = 0; i < NUM; i++) { - assert(A[i] == B[i]); - assert(A[i] == C[i]); - } - - for (int i = 0; i < NUM; i++) { - A[i] = -3 * i; - B[i] = 0; - } - - hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream); - hipStreamSynchronize(stream); - hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); - hipStreamSynchronize(stream); - for (int i = 0; i < NUM; i++) { - assert(A[i] == B[i]); - assert(A[i] == C[i]); - } - - bool *checkOkD; - bool checkOk = false; - size_t symbolSize = 0; - int *symbolAddress; - hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst)); - hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst)); - hipMalloc((void**)&checkOkD, sizeof(bool)); - hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD); - hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost); - hipFree(checkOkD); - assert(checkOk); - assert(symbolSize == SIZE); - - hipHostFree(Am); - hipHostFree(Cm); - hipFree(Ad); - delete[] A; - delete[] B; - delete[] C; - passed(); -} diff --git a/tests/src/deviceLib/hipTestDotFunctions.cpp b/tests/src/deviceLib/hipTestDotFunctions.cpp deleted file mode 100644 index f13198ab4a..0000000000 --- a/tests/src/deviceLib/hipTestDotFunctions.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - - -__global__ void DotFunctions(bool* result) { - #if __HIP_CLANG_ONLY__ - // Dot Functions - short2 sa{1}, sb{1}; - result[0] = amd_mixed_dot(sa, sb, 1, result[0]) && result[0]; - - ushort2 usa{1}, usb{1}; - result[0] = amd_mixed_dot(usa, usb, (uint) 1, result[0]) && result[0]; - - char4 ca{1}, cb{1}; - result[0] = amd_mixed_dot(ca, cb, 1, result[0]) && result[0]; - - uchar4 uca{1}, ucb{1}; - result[0] = amd_mixed_dot(uca, ucb, (uint) 1, result[0]) && result[0]; - - int ia{1}, ib{1}; - result[0] = amd_mixed_dot(ia, ib, 1, result[0]) && result[0]; - - uint ua{1}, ub{1}; - result[0] = amd_mixed_dot(ua, ub, (uint) 1, result[0]) && result[0]; - #endif -} - -int main() { - bool* result{nullptr}; - hipHostMalloc(&result, 1); - result[0] = true; - - hipLaunchKernelGGL(DotFunctions, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result); - hipDeviceSynchronize(); - if (!result[0]) { failed("Failed dot tests."); } - - hipHostFree(result); - - passed(); -} diff --git a/tests/src/deviceLib/hipTestFMA.cpp b/tests/src/deviceLib/hipTestFMA.cpp deleted file mode 100644 index 1bb1ae1dcb..0000000000 --- a/tests/src/deviceLib/hipTestFMA.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include -#include - -#define HIP_ASSERT(status) assert(status == hipSuccess) - -#define LEN 50 -#define SIZE (LEN * sizeof(bool)) - - __global__ void kernelTestFMA(bool *Ad) { - float f = 1.0f / 3.0f; - double d = f; - int i = 0; - auto Check = [&](bool Cond) { Ad[i++] = Cond; }; - // f * f + 3.0f will be different if promoted to double. - float floatResult = fma(f, f, 3.0f); - double doubleResult = fma(d, d, 3.0); - Check(floatResult != doubleResult); - - if(sizeof(decltype(fma(f, f, 3))) == 8) { - // To align with libcxx, if any argument has integral type, - // it is cast to double. - // Check type promotes to double. - Check(fma(f, f, 3) == doubleResult); - Check(fma(f, f, (char)3) == doubleResult); - Check(fma(f, f, (unsigned char)3) == doubleResult); - Check(fma(f, f, (short)3) == doubleResult); - Check(fma(f, f, (unsigned short)3) == doubleResult); - Check(fma(f, f, (int)3) == doubleResult); - Check(fma(f, f, (unsigned int)3) == doubleResult); - Check(fma(f, f, (long)3) == doubleResult); - Check(fma(f, f, (unsigned long)3) == doubleResult); - Check(fma(f, f, true) == fma((double)f, (double)f, 1.0)); - } else if(sizeof(decltype(fma(f, f, 3))) == 4) { - // Previous HIP headers returns float type. - // Delete this to support backwards compatibility. - // check promote to float. - Check(fma(f, f, 3) == floatResult); - Check(fma(f, f, (char)3) == floatResult); - Check(fma(f, f, (unsigned char)3) == floatResult); - Check(fma(f, f, (short)3) == floatResult); - Check(fma(f, f, (unsigned short)3) == floatResult); - Check(fma(f, f, (int)3) == floatResult); - Check(fma(f, f, (unsigned int)3) == floatResult); - Check(fma(f, f, (long)3) == floatResult); - Check(fma(f, f, (unsigned long)3) == floatResult); - Check(fma(f, f, true) == fma(f, f, 1.0f)); - } else { - assert(0 && "Invalid fma return type."); - } - - Check(fma(d, (double)f, 3) == doubleResult); - Check(fma(d, (double)f, (char)3) == doubleResult); - Check(fma(d, (double)f, (unsigned char)3) == doubleResult); - Check(fma(d, (double)f, (short)3) == doubleResult); - Check(fma(d, (double)f, (unsigned short)3) == doubleResult); - Check(fma(d, (double)f, (int)3) == doubleResult); - Check(fma(d, (double)f, (unsigned int)3) == doubleResult); - Check(fma(d, (double)f, (long)3) == doubleResult); - Check(fma(d, (double)f, (unsigned long)3) == doubleResult); - Check(fma(d, (double)f, true) == fma((double)f, (double)f, 1.0)); - - while (i < LEN) - Check(true); - } - - void runTestFMA() { - bool *Ad; - bool A[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - - HIP_ASSERT(hipMalloc((void **)&Ad, SIZE)); - hipLaunchKernelGGL(kernelTestFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(A[i]); - } - } - - __global__ void kernelTestHalfFMA(bool *Ad) { - _Float16 h = (_Float16)(1.0f/3.0f); - float f = h; - double d = f; - int i = 0; - auto Check = [&](bool Cond) { Ad[i++] = Cond; }; - // h * h + 3 will be different if promoted to float. - _Float16 halfResult = fma(h, h, (_Float16)3); - float floatResult = fma(f, f, 3.0f); - double doubleResult = fma(d, d, 3.0); - Check(halfResult != floatResult); - Check(halfResult != doubleResult); - - // check promote to half. - // fma(_Float16, _Float16, int) should resolve to - // fma(double, double, double). This is similar to - // fma(float, float, int) resolving to fma(double, double, double) - // as required Standard C++ header . - if (sizeof(decltype(fma(h, h, 3))) == 8) { - Check(fma(h, h, 3) == doubleResult); - Check(fma(h, h, (char)3) == doubleResult); - Check(fma(h, h, (unsigned char)3) == doubleResult); - Check(fma(h, h, (short)3) == doubleResult); - Check(fma(h, h, (unsigned short)3) == doubleResult); - Check(fma(h, h, (int)3) == doubleResult); - Check(fma(h, h, (unsigned int)3) == doubleResult); - Check(fma(h, h, (long)3) == doubleResult); - Check(fma(h, h, (unsigned long)3) == doubleResult); - Check(fma(h, h, true) == fma((double)h, (double) h, 1.0)); - } else if (sizeof(decltype(fma(h, h, 3))) == 2) { - // ToDo: Currently there is a bug in clang header - // __clang_hip_cmath.h due to using - // std::numeric_limits::is_specified to define - // overloaded math functions. Since numeric_limits is - // not specicialized for _Float16, overloaded template - // functions with argument promotion are not defined - // for _Float16. As a result, fma(_Float16, _Float16, int) - // is resolved to fma(_Float16, _Float16, _Float16). - // This part should be removed after __clang_hip_cmath.h - // is fixed. - Check(fma(h, h, 3) == halfResult); - Check(fma(h, h, (char)3) == halfResult); - Check(fma(h, h, (unsigned char)3) == halfResult); - Check(fma(h, h, (short)3) == halfResult); - Check(fma(h, h, (unsigned short)3) == halfResult); - Check(fma(h, h, (int)3) == halfResult); - Check(fma(h, h, (unsigned int)3) == halfResult); - Check(fma(h, h, (long)3) == halfResult); - Check(fma(h, h, (unsigned long)3) == halfResult); - Check(fma(h, h, true) == fma(h, h, (_Float16)1)); - } else { - assert(0 && "Invalid fma return type."); - } - - while (i < LEN) - Check(true); - } - - void runTestHalfFMA() { - bool *Ad; - bool A[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - - HIP_ASSERT(hipMalloc((void **)&Ad, SIZE)); - hipLaunchKernelGGL(kernelTestHalfFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(A[i]); - } - } - -int main() { - runTestFMA(); - runTestHalfFMA(); - passed(); -} diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp deleted file mode 100644 index a418080550..0000000000 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ /dev/null @@ -1,294 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" - -#include "test_common.h" - -__device__ void test_convert() { - __half x; - float y = (float)x; -} - -__global__ -void __halfMath(bool* result, __half a) { - result[0] = __heq(__hadd(a, __half{1}), __half{2}); - result[0] = __heq(__hadd_sat(a, __half{1}), __half{1}) && result[0]; - result[0] = __heq(__hfma(a, __half{2}, __half{3}), __half{5}) && result[0]; - result[0] = - __heq(__hfma_sat(a, __half{2}, __half{3}), __half{1}) && result[0]; - result[0] = __heq(__hsub(a, __half{1}), __half{0}) && result[0]; - result[0] = __heq(__hsub_sat(a, __half{2}), __half{0}) && result[0]; - result[0] = __heq(__hmul(a, __half{2}), __half{2}) && result[0]; - result[0] = __heq(__hmul_sat(a, __half{2}), __half{1}) && result[0]; - result[0] = __heq(__hdiv(a, __half{2}), __half{0.5}) && result[0]; -} - -__device__ -bool to_bool(const __half2& x) -{ - auto r = static_cast(x); - - return r.data.x != 0 && r.data.y != 0; -} - -__global__ -void __half2Math(bool* result, __half2 a) { - result[0] = - to_bool(__heq2(__hadd2(a, __half2{1, 1}), __half2{2, 2})); - result[0] = to_bool(__heq2(__hadd2_sat(a, __half2{1, 1}), __half2{1, 1})) && - result[0]; - result[0] = to_bool(__heq2( - __hfma2(a, __half2{2, 2}, __half2{3, 3}), __half2{5, 5})) && result[0]; - result[0] = to_bool(__heq2( - __hfma2_sat(a, __half2{2, 2}, __half2{3, 3}), __half2{1, 1})) && result[0]; - result[0] = to_bool(__heq2(__hsub2(a, __half2{1, 1}), __half2{0, 0})) && - result[0]; - result[0] = to_bool(__heq2(__hsub2_sat(a, __half2{2, 2}), __half2{0, 0})) && - result[0]; - result[0] = to_bool(__heq2(__hmul2(a, __half2{2, 2}), __half2{2, 2})) && - result[0]; - result[0] = to_bool(__heq2(__hmul2_sat(a, __half2{2, 2}), __half2{1, 1})) && - result[0]; - result[0] = to_bool(__heq2(__h2div(a, __half2{2, 2}), __half2{0.5, 0.5})) && - result[0]; -} - -__global__ -void kernel_hisnan(__half* input, int* output) { - int tx = threadIdx.x; - output[tx] = __hisnan(input[tx]); -} - -__global__ -void kernel_hisinf(__half* input, int* output) { - int tx = threadIdx.x; - output[tx] = __hisinf(input[tx]); -} - -__global__ void testHalfAbs(float* p) { - auto a = __float2half(*p); - a = __habs(a); - *p = __half2float(a); -} - -__global__ void testHalf2Abs(float2* p) { - auto a = __float22half2_rn(*p); - a = __habs2(a); - *p = __half22float2(a); -} - - -__half host_ushort_as_half(unsigned short s) { - union {__half h; unsigned short s; } converter; - converter.s = s; - return converter.h; -} - - -void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) { - - // allocate memory - auto memsize = NUM_INPUTS * sizeof(int); - int* outputGPU = nullptr; - hipMalloc((void**)&outputGPU, memsize); - - // launch the kernel - hipLaunchKernelGGL( - kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); - - // copy output from device - int* outputCPU = (int*) malloc(memsize); - hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); - - // check output - for (int i=0; i(inputCPU[i]), - i); - } - } - else { // inputs are NOT nan, output should be false - if (outputCPU[i] != 0) { - failed( - "__hisnan() returned true for %f (input idx = %d)\n", - static_cast(inputCPU[i]), - i); - } - } - } - - // free memory - free(outputCPU); - hipFree(outputGPU); - - // done - return; -} - - -void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) { - // allocate memory - auto memsize = NUM_INPUTS * sizeof(int); - int* outputGPU = nullptr; - hipMalloc((void**)&outputGPU, memsize); - - // launch the kernel - hipLaunchKernelGGL( - kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU); - - // copy output from device - int* outputCPU = (int*) malloc(memsize); - hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); - - // check output - for (int i=0; i(inputCPU[i]), - i); - } - } - else { // inputs are NOT inf, output should be false - if (outputCPU[i] != 0) { - failed( - "__hisinf() returned true for %f (input idx = %d)\n", - static_cast(inputCPU[i]), - i); - } - } - } - - // free memory - free(outputCPU); - hipFree(outputGPU); - - // done - return; -} - - -void checkFunctional() { - - // allocate memory - const int NUM_INPUTS = 16; - auto memsize = NUM_INPUTS * sizeof(__half); - __half* inputCPU = (__half*) malloc(memsize); - - // populate inputs - inputCPU[0] = host_ushort_as_half(0x7c00); // inf - inputCPU[1] = host_ushort_as_half(0xfc00); // -inf - inputCPU[2] = host_ushort_as_half(0x7c01); // nan - inputCPU[3] = host_ushort_as_half(0x7e00); // nan - inputCPU[4] = host_ushort_as_half(0xfc01); // nan - inputCPU[5] = host_ushort_as_half(0xfe00); // nan - inputCPU[6] = host_ushort_as_half(0x0000); // 0 - inputCPU[7] = host_ushort_as_half(0x8000); // -0 - inputCPU[8] = host_ushort_as_half(0x7bff); // max +ve normal - inputCPU[9] = host_ushort_as_half(0xfbff); // max -ve normal - inputCPU[10] = host_ushort_as_half(0x0400); // min +ve normal - inputCPU[11] = host_ushort_as_half(0x8400); // min -ve normal - inputCPU[12] = host_ushort_as_half(0x03ff); // max +ve sub-normal - inputCPU[13] = host_ushort_as_half(0x83ff); // max -ve sub-normal - inputCPU[14] = host_ushort_as_half(0x0001); // min +ve sub-normal - inputCPU[15] = host_ushort_as_half(0x8001); // min -ve sub-normal - - // copy inputs to the GPU - __half* inputGPU = nullptr; - hipMalloc((void**)&inputGPU, memsize); - hipMemcpy(inputGPU, inputCPU, memsize, hipMemcpyHostToDevice); - - // run checks - - check_hisnan(NUM_INPUTS, inputCPU, inputGPU); - - check_hisinf(NUM_INPUTS, inputCPU, inputGPU); - - // free memory - hipFree(inputGPU); - free(inputCPU); - - // all done - return; -} - -void checkHalfAbs() { - { - float *p; - hipMalloc(&p, sizeof(float)); - float pp = -2.1f; - hipMemcpy(p, &pp, sizeof(float), hipMemcpyDefault); - hipLaunchKernelGGL(testHalfAbs, 1, 1, 0, 0, p); - hipMemcpy(&pp, p, sizeof(float), hipMemcpyDefault); - hipFree(p); - if(pp < 0.0f) { failed("Half Abs failed"); } - } - { - float2 *p; - hipMalloc(&p, sizeof(float2)); - float2 pp; - pp.x = -2.1f; - pp.y = -1.1f; - hipMemcpy(p, &pp, sizeof(float2), hipMemcpyDefault); - hipLaunchKernelGGL(testHalf2Abs, 1, 1, 0, 0, p); - hipMemcpy(&pp, p, sizeof(float2), hipMemcpyDefault); - hipFree(p); - if(pp.x < 0.0f || pp.y < 0.0f) { failed("Half2 Abs Test Failed"); } - } -} - -int main() { - bool* result{nullptr}; - hipMemAllocHost((void**)&result, sizeof(result)); - - result[0] = false; - hipLaunchKernelGGL( - __halfMath, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half{1}); - hipDeviceSynchronize(); - - if (!result[0]) { failed("Failed __half tests."); } - - result[0] = false; - hipLaunchKernelGGL( - __half2Math, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half2{1, 1}); - hipDeviceSynchronize(); - - if (!result[0]) { failed("Failed __half2 tests."); } - - hipHostFree(result); - - // run some functional checks - checkFunctional(); - - checkHalfAbs(); - - passed(); -} diff --git a/tests/src/deviceLib/hipTestHost.cpp b/tests/src/deviceLib/hipTestHost.cpp deleted file mode 100644 index 886849f530..0000000000 --- a/tests/src/deviceLib/hipTestHost.cpp +++ /dev/null @@ -1,384 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "test_common.h" -#include -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - -#define N 512 - -bool check_erfcinvf() { - uint32_t len = 4; - float Val[] = {0.1, 1.2, 1, 0.9}; - float Out[] = {1.16309, -0.179144, 0, 0.0889}; - for (int i = 0; i < len; i++) { - if (Out[i] - erfcinvf(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_erfcxf() { - uint32_t len = 4; - float Val[] = {-0.5, 15, 3.2, 1}; - float Out[] = {1.9524, 0.0375, 0.1687, 0.4276}; - for (int i = 0; i < len; i++) { - if (Out[i] - erfcxf(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_erfinvf() { - uint32_t len = 4; - float Val[] = {0, -0.5, 0.9, -0.2}; - float Out[] = {0, -0.4769, 1.1631, -0.1791}; - for (int i = 0; i < len; i++) { - if (Out[i] - erfinvf(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_fdividef() { - uint32_t len = 4; - float Val[] = {0, -0.5, 0.9, -0.2}; - float Out[] = {1, -0.4769, 1.1631, -0.1791}; - for (int i = 0; i < len; i++) { - if (Val[i] / Out[i] - fdividef(Val[i], Out[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_erfcinv() { - uint32_t len = 4; - double Val[] = {0.1, 1.2, 1, 0.9}; - double Out[] = {1.16309, -0.179144, 0, 0.0889}; - for (int i = 0; i < len; i++) { - if (Out[i] - erfcinv(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_erfcx() { - uint32_t len = 4; - double Val[] = {-0.5, 15, 3.2, 1}; - double Out[] = {1.9524, 0.0375, 0.1687, 0.4276}; - for (int i = 0; i < len; i++) { - if (Out[i] - erfcx(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_erfinv() { - uint32_t len = 4; - double Val[] = {0, -0.5, 0.9, -0.2}; - double Out[] = {0, -0.4769, 1.1631, -0.1791}; - for (int i = 0; i < len; i++) { - if (Out[i] - erfinv(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_fdivide() { - uint32_t len = 4; - double Val[] = {0, -0.5, 0.9, -0.2}; - double Out[] = {1, -0.4769, 1.1631, -0.1791}; - for (int i = 0; i < len; i++) { - if (Val[i] / Out[i] - fdivide(Val[i], Out[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_modff() { - uint32_t len = 4; - float Val[] = {0, -0.5, 0.9, -0.2}; - float iPtr[] = {0, 0, 0, 0}; - float frac[] = {0, -0.5, 0.9, -0.2}; - float Out[] = {1, 1, 1, 1}; - for (int i = 0; i < len; i++) { - if (frac[i] - modff(Val[i], Out + i) > 0.0001 && iPtr[i] == Out[i]) { - return false; - } - } - return true; -} - -bool check_modf() { - uint32_t len = 4; - double Val[] = {0, -0.5, 0.9, -0.2}; - double iPtr[] = {0, 0, 0, 0}; - double frac[] = {0, -0.5, 0.9, -0.2}; - double Out[] = {1, 1, 1, 1}; - for (int i = 0; i < len; i++) { - if (frac[i] - modf(Val[i], Out + i) > 0.0001 && iPtr[i] == Out[i]) { - return false; - } - } - return true; -} - -bool check_nextafterf() { - uint32_t len = 4; - float Val[] = {0, -0.5, 0.9, -0.2}; - float iPtr[] = {0, 0, 0, 0}; - float frac[] = {0, -0.5, 0.9, -0.2}; - float Out[] = {1, 1, 1, 1}; - for (int i = 0; i < len; i++) { - if (nextafterf(Val[i], 1) - Val[i] > 0.0001) { - return false; - } - } - return true; -} - -bool check_nextafter() { - uint32_t len = 4; - double Val[] = {0, -0.5, 0.9, -0.2}; - double iPtr[] = {0, 0, 0, 0}; - double frac[] = {0, -0.5, 0.9, -0.2}; - double Out[] = {1, 1, 1, 1}; - for (int i = 0; i < len; i++) { - if (nextafter(Val[i], 1) - Val[i] > 0.0001) { - return false; - } - } - return true; -} - -bool check_norm3df(float* A) { - float f = norm3df(A[0], A[1], A[2]); - float out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2]); - if (f - out > 0.0001) { - return false; - } - return true; -} - -bool check_norm3d(double* A) { - double f = norm3d(A[0], A[1], A[2]); - double out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2]); - if (f - out > 0.0001) { - return false; - } - return true; -} - -bool check_norm4df(float* A) { - float f = norm4df(A[0], A[1], A[2], A[3]); - float out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + A[3] * A[3]); - if (f - out > 0.0001) { - return false; - } - return true; -} - -bool check_norm4d(double* A) { - double f = norm4d(A[0], A[1], A[2], A[3]); - double out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + A[3] * A[3]); - if (f - out > 0.0001) { - return false; - } - return true; -} - -bool check_normcdff() { - uint32_t len = 2; - float Val[] = {0, 1}; - float Out[] = {0.5, 0.8413}; - for (int i = 0; i < len; i++) { - if (Out[i] - normcdff(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - -bool check_normcdf() { - uint32_t len = 2; - float Val[] = {0, 1}; - float Out[] = {0.5, 0.8413}; - for (int i = 0; i < len; i++) { - if (Out[i] - normcdf(Val[i]) > 0.0001) { - return false; - } - } - return true; -} - - -bool check_normcdfinvf() { - uint32_t len = 2; - double Val[] = {0.5, 0.8413}; - for (int i = 0; i < len; i++) { - if (Val[i] - normcdfinvf(normcdff(Val[i])) > 0.0001) { - return false; - } - } - return true; -} - -bool check_normcdfinv() { - uint32_t len = 2; - double Val[] = {0.5, 0.8413}; - for (int i = 0; i < len; i++) { - if (Val[i] - normcdfinv(normcdf(Val[i])) > 0.0001) { - return false; - } - } - return true; -} - -bool check_rcbrtf() { - float f = 1.0f; - if (rcbrtf(f) != 1.0f) { - return false; - } - return true; -} - -bool check_rcbrt() { - double f = 1.0; - if (rcbrt(f) != 1.0) { - return false; - } - return true; -} - -bool check_rhypotf() { - float f = 1.0f; - float g = 2.0f; - float val = rhypotf(f, g); - float sq = f * f + g * g; - if (1 / (val * val) - sq > 0.0001) { - return false; - } - return true; -} - -bool check_rhypot() { - double f = 1.0f; - double g = 2.0f; - double val = rhypot(f, g); - double sq = f * f + g * g; - if (1 / (val * val) - sq > 0.0001) { - return false; - } - return true; -} - -bool check_rnorm3df(float* A) { - float f = rnorm3df(A[0], A[1], A[2]); - float out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2]); - if (f - 1 / out > 0.0001) { - return false; - } - return true; -} - -bool check_rnorm3d(double* A) { - double f = rnorm3d(A[0], A[1], A[2]); - double out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2]); - if (f - 1 / out > 0.0001) { - return false; - } - return true; -} - -bool check_rnorm4df(float* A) { - float f = rnorm4df(A[0], A[1], A[2], A[3]); - float out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + A[3] * A[3]); - if (f - 1 / out > 0.0001) { - return false; - } - return true; -} - -bool check_rnorm4d(double* A) { - double f = rnorm4d(A[0], A[1], A[2], A[3]); - double out = sqrt(A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + A[3] * A[3]); - if (f - 1 / out > 0.0001) { - return false; - } - return true; -} - -bool check_rnormf(float* A) { - return (rnorm3df(A[0], A[1], A[2]) - rnormf(3, A) < 0.0001) && - (rnorm4df(A[0], A[1], A[2], A[3]) - rnormf(4, A) < 0.0001); -} - -bool check_rnorm(double* A) { - return (rnorm3d(A[0], A[1], A[2]) - rnorm(3, A) < 0.0001) && - (rnorm4d(A[0], A[1], A[2], A[3]) - rnorm(4, A) < 0.0001); -} - -bool check_sincospif() { - float s1, c1, s2, c2; - float in1 = 1, in2 = 0.5; - sincospif(in1, &s1, &c1); - sincospif(in2, &s2, &c2); - if ((s1 - 0 < 0.00001) && (s2 - 1 < 0.00001) && (c1 + 1 < 0.00001) && (c2 - 0 < 0.00001)) { - return true; - } - return false; -} - -bool check_sincospi() { - double s1, c1, s2, c2; - double in1 = 1, in2 = 0.5; - sincospi(in1, &s1, &c1); - sincospi(in2, &s2, &c2); - if ((s1 - 0 < 0.00001) && (s2 - 1 < 0.00001) && (c1 + 1 < 0.00001) && (c2 - 0 < 0.00001)) { - return true; - } - return false; -} - -int main() { - float* Af = new float[N]; - double* A = new double[N]; - for (int i = 0; i < N; i++) { - Af[i] = i * 1.0f; - A[i] = i * 1.0; - } - if (check_erfcinvf() && check_erfcxf() && check_erfcinvf() && check_erfcinv() && - check_erfcx() && check_erfcinv() && check_fdividef() && check_fdivide() && check_modff() && - check_modf() && check_nextafterf() && check_norm3df(Af) && check_norm3d(A) && - check_norm4df(Af) && check_norm4d(A) && check_normcdff() && check_normcdf() && - check_normcdfinvf() && check_normcdfinv() && check_rcbrtf() && check_rcbrt() && - check_rhypotf() && check_rhypot() && check_rnorm3df(Af) && check_rnorm3d(A) && - check_rnorm4df(Af) && check_rnorm4d(A) && check_rnormf(Af) && check_rnorm(A) && - check_sincospif() && check_sincospi()) { - passed(); - } -} diff --git a/tests/src/deviceLib/hipTestIncludeMath.cpp b/tests/src/deviceLib/hipTestIncludeMath.cpp deleted file mode 100644 index d105b90dc4..0000000000 --- a/tests/src/deviceLib/hipTestIncludeMath.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -// Test include math_functions.h then hip_runtime.h. -// Incorrect implementation causes compilation failure due to conflict -// declartions. - -#include -#include - -// Test __HIP_DEVICE_COMPILE__ is defined after math_functions.h -// is included. -// -__device__ __host__ inline void throw_std_bad_alloc() -{ - #ifndef __HIP_DEVICE_COMPILE__ - throw std::bad_alloc(); - #else - std::size_t huge = static_cast(-1); - new int[huge]; - #endif -} - -#include -#include "test_common.h" - -__global__ void FloatMathPrecise() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - fX = ceilf(0.0f); - fX = copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - cospif(0.0f); - cyl_bessel_i0f(0.0f); - cyl_bessel_i1f(0.0f); - erfcf(0.0f); - erfcinvf(2.0f); - erfcxf(0.0f); - erff(0.0f); - erfinvf(1.0f); - exp10f(0.0f); - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fX = fabsf(1.0f); - fdimf(1.0f, 0.0f); - fdividef(0.0f, 1.0f); - fX = floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fX = fmaxf(0.0f, 0.0f); - fX = fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - isfinite(0.0f); - fX = isinf(0.0f); - fX = isnan(0.0f); - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - ldexpf(0.0f, 0); - lgammaf(1.0f); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - modff(0.0f, &fX); - fX = nanf("1"); - fX = nearbyintf(0.0f); - nextafterf(0.0f, 0.0f); - norm3df(1.0f, 0.0f, 0.0f); - norm4df(1.0f, 0.0f, 0.0f, 0.0f); - normcdff(0.0f); - normcdfinvf(1.0f); - fX = 1.0f; - normf(1, &fX); - powf(1.0f, 0.0f); - rcbrtf(1.0f); - remainderf(2.0f, 1.0f); - remquof(1.0f, 2.0f, &iX); - rhypotf(0.0f, 1.0f); - fY = rintf(1.0f); - rnorm3df(0.0f, 0.0f, 1.0f); - rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); - fX = 1.0f; - rnormf(1, &fX); - fY = roundf(0.0f); - rsqrtf(1.0f); - scalblnf(0.0f, 1); - scalbnf(0.0f, 1); - signbit(1.0f); - sincosf(0.0f, &fX, &fY); - sincospif(0.0f, &fX, &fY); - sinf(0.0f); - sinhf(0.0f); - sinpif(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - fY = truncf(0.0f); - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); -} - -int main() { - hipLaunchKernelGGL(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0); - passed(); -} diff --git a/tests/src/deviceLib/hipTestNativeHalf.cpp b/tests/src/deviceLib/hipTestNativeHalf.cpp deleted file mode 100644 index 22e2b8279e..0000000000 --- a/tests/src/deviceLib/hipTestNativeHalf.cpp +++ /dev/null @@ -1,188 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" - -#include "test_common.h" - -#include - -using namespace std; - -__global__ -void __halfTest(bool* result, __half a) { - // Construction - static_assert(is_default_constructible<__half>{}, ""); - static_assert(is_copy_constructible<__half>{}, ""); - static_assert(is_move_constructible<__half>{}, ""); - static_assert(is_constructible<__half, float>{}, ""); - static_assert(is_constructible<__half, double>{}, ""); - static_assert(is_constructible<__half, unsigned short>{}, ""); - static_assert(is_constructible<__half, short>{}, ""); - static_assert(is_constructible<__half, unsigned int>{}, ""); - static_assert(is_constructible<__half, int>{}, ""); - static_assert(is_constructible<__half, unsigned long>{}, ""); - static_assert(is_constructible<__half, long>{}, ""); - static_assert(is_constructible<__half, long long>{}, ""); - static_assert(is_constructible<__half, unsigned long long>{}, ""); - static_assert(is_constructible<__half, __half_raw>{}, ""); - - // Assignment - static_assert(is_copy_assignable<__half>{}, ""); - static_assert(is_move_assignable<__half>{}, ""); - static_assert(is_assignable<__half, float>{}, ""); - static_assert(is_assignable<__half, double>{}, ""); - static_assert(is_assignable<__half, unsigned short>{}, ""); - static_assert(is_assignable<__half, short>{}, ""); - static_assert(is_assignable<__half, unsigned int>{}, ""); - static_assert(is_assignable<__half, int>{}, ""); - static_assert(is_assignable<__half, unsigned long>{}, ""); - static_assert(is_assignable<__half, long>{}, ""); - static_assert(is_assignable<__half, long long>{}, ""); - static_assert(is_assignable<__half, unsigned long long>{}, ""); - static_assert(is_assignable<__half, __half_raw>{}, ""); - static_assert(is_assignable<__half, volatile __half_raw&>{}, ""); - static_assert(is_assignable<__half, volatile __half_raw&&>{}, ""); - - // Conversion - static_assert(is_convertible<__half, float>{}, ""); - static_assert(is_convertible<__half, unsigned short>{}, ""); - static_assert(is_convertible<__half, short>{}, ""); - static_assert(is_convertible<__half, unsigned int>{}, ""); - static_assert(is_convertible<__half, int>{}, ""); - static_assert(is_convertible<__half, unsigned long>{}, ""); - static_assert(is_convertible<__half, long>{}, ""); - static_assert(is_convertible<__half, long long>{}, ""); - static_assert(is_convertible<__half, bool>{}, ""); - static_assert(is_convertible<__half, unsigned long long>{}, ""); - static_assert(is_convertible<__half, __half_raw>{}, ""); - static_assert(is_convertible<__half, volatile __half_raw>{}, ""); - - // Nullary - result[0] = __heq(a, +a) && result[0]; - result[0] = __heq(__hneg(a), -a) && result[0]; - - // Unary arithmetic - result[0] = __heq(a += 0, a) && result[0]; - result[0] = __heq(a -= 0, a) && result[0]; - result[0] = __heq(a *= 1, a) && result[0]; - result[0] = __heq(a /= 1, a) && result[0]; - - // Binary arithmetic - result[0] = __heq((a + a), __hadd(a, a)) && result[0]; - result[0] = __heq((a - a), __hsub(a, a)) && result[0]; - result[0] = __heq((a * a), __hmul(a, a)) && result[0]; - result[0] = __heq((a / a), __hdiv(a, a)) && result[0]; - - // Relations - result[0] = (a == a) && result[0]; - result[0] = !(a != a) && result[0]; - result[0] = (a <= a) && result[0]; - result[0] = (a >= a) && result[0]; - result[0] = !(a < a) && result[0]; - result[0] = !(a > a) && result[0]; -} - -__device__ -bool to_bool(const __half2& x) -{ - auto r = static_cast(x); - - return r.data.x != 0 && r.data.y != 0; -} - -__global__ -void __half2Test(bool* result, __half2 a) { - // Construction - static_assert(is_default_constructible<__half2>{}, ""); - static_assert(is_copy_constructible<__half2>{}, ""); - static_assert(is_move_constructible<__half2>{}, ""); - static_assert(is_constructible<__half2, __half, __half>{}, ""); - static_assert(is_constructible<__half2, __half2_raw>{}, ""); - - // Assignment - static_assert(is_copy_assignable<__half2>{}, ""); - static_assert(is_move_assignable<__half2>{}, ""); - static_assert(is_assignable<__half2, __half2_raw>{}, ""); - - // Conversion - static_assert(is_convertible<__half2, __half2_raw>{}, ""); - - // Nullary - result[0] = to_bool(__heq2(a, +a)) && result[0]; - result[0] = to_bool(__heq2(__hneg2(a), -a)) && result[0]; - - // Unary arithmetic - result[0] = to_bool(__heq2(a += 0, a)) && result[0]; - result[0] = to_bool(__heq2(a -= 0, a)) && result[0]; - result[0] = to_bool(__heq2(a *= 1, a)) && result[0]; - result[0] = to_bool(__heq2(a /= 1, a)) && result[0]; - - // Binary arithmetic - result[0] = to_bool(__heq2((a + a), __hadd2(a, a))) && result[0]; - result[0] = to_bool(__heq2((a - a), __hsub2(a, a))) && result[0]; - result[0] = to_bool(__heq2((a * a), __hmul2(a, a))) && result[0]; - result[0] = to_bool(__heq2((a / a), __h2div(a, a))) && result[0]; - - // Relations - result[0] = (a == a) && result[0]; - result[0] = !(a != a) && result[0]; - result[0] = (a <= a) && result[0]; - result[0] = (a >= a) && result[0]; - result[0] = !(a < a) && result[0]; - result[0] = !(a > a) && result[0]; - - #if __HIP_CLANG_ONLY__ - // Dot Functions - result[0] = amd_mixed_dot(a, a, 1, 1) && result[0]; - #endif - - half X = a.x; - half Y = a.y; -} - -int main() { - bool* result{nullptr}; - hipHostMalloc(&result, 1); - - result[0] = true; - hipLaunchKernelGGL( - __halfTest, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half{1}); - hipDeviceSynchronize(); - - if (!result[0]) { failed("Failed __half tests."); } - - result[0] = true; - hipLaunchKernelGGL( - __half2Test, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half2{1, 1}); - hipDeviceSynchronize(); - - if (!result[0]) { failed("Failed __half2 tests."); } - - hipHostFree(result); - - passed(); -} diff --git a/tests/src/deviceLib/hipTestNew.cpp b/tests/src/deviceLib/hipTestNew.cpp deleted file mode 100644 index 6d5e67e5fe..0000000000 --- a/tests/src/deviceLib/hipTestNew.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define HIP_ASSERT(status) assert(status == hipSuccess) - -#define LEN 512 -#define SIZE 2048 - - class A { - public: - __device__ A() { - a = threadIdx.x + blockIdx.x * blockDim.x; - } - private: - int a; - }; - - static __global__ void kernel(int* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - new(Ad+tid) A(); - } - - void run() { - int *A, *Ad; - A = new int[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - - HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); - hipLaunchKernelGGL(kernel, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(i == A[i]); - } - } - -int main() { - run(); - passed(); -} diff --git a/tests/src/deviceLib/hipThreadFence.cpp b/tests/src/deviceLib/hipThreadFence.cpp deleted file mode 100644 index 3f27ebebf2..0000000000 --- a/tests/src/deviceLib/hipThreadFence.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define NUM 1024 -#define SIZE NUM * sizeof(float) - -__global__ void vAdd(float* In1, float* In2, float* In3, float* In4, float* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - In4[tid] = In1[tid] + In2[tid]; - __threadfence(); - In3[tid] = In3[tid] + In4[tid]; - __threadfence_block(); - Out[tid] = In4[tid] + In3[tid]; -} - -int main() { - float* In1 = new float[1024]; - float* In2 = new float[1024]; - float* In3 = new float[1024]; - float* In4 = new float[1024]; - float* Out = new float[1024]; - - for (uint32_t i = 0; i < 1024; i++) { - In1[i] = 1.0f; - In2[i] = 1.0f; - In3[i] = 1.0f; - In4[i] = 1.0f; - } - - float *In1d, *In2d, *In3d, *In4d, *Outd; - hipMalloc((void**)&In1d, SIZE); - hipMalloc((void**)&In2d, SIZE); - hipMalloc((void**)&In3d, SIZE); - hipMalloc((void**)&In4d, SIZE); - hipMalloc((void**)&Outd, SIZE); - - hipMemcpy(In1d, In1, SIZE, hipMemcpyHostToDevice); - hipMemcpy(In2d, In2, SIZE, hipMemcpyHostToDevice); - hipMemcpy(In3d, In3, SIZE, hipMemcpyHostToDevice); - hipMemcpy(In4d, In4, SIZE, hipMemcpyHostToDevice); - - hipLaunchKernelGGL(vAdd, dim3(32, 1, 1), dim3(32, 1, 1), 0, 0, In1d, In2d, In3d, In4d, Outd); - hipMemcpy(Out, Outd, SIZE, hipMemcpyDeviceToHost); - assert(Out[10] == 2 * In1[10] + 2 * In2[10] + In3[10]); - passed(); -} diff --git a/tests/src/deviceLib/hipVectorTypes.cpp b/tests/src/deviceLib/hipVectorTypes.cpp deleted file mode 100644 index abfd5ca150..0000000000 --- a/tests/src/deviceLib/hipVectorTypes.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia amd - * TEST: %t - * HIT_END - */ - -#include - -#include "vector_test_common.h" -#include "test_common.h" - -#include -#include -#include -#include -#include - -using namespace std; - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -bool integer_unary_tests(V&, V&) { - return true; -} - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -bool integer_binary_tests(V&, V&, V&...) { - return true; -} - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -bool integer_unary_tests(V& f1, V& f2) { - f1 %= f2; - if (f1 != V{0}) return false; - f1 &= f2; - if (f1 != V{0}) return false; - f1 |= f2; - if (f1 != V{1}) return false; - f1 ^= f2; - if (f1 != V{0}) return false; - f1 = V{1}; - f1 <<= f2; - if (f1 != V{2}) return false; - f1 >>= f2; - if (f1 != V{1}) return false; - f2 = ~f1; - return f2 == V{~1}; -} - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -bool integer_binary_tests(V& f1, V& f2, V& f3) { - f3 = f1 % f2; - if (f3 != V{0}) return false; - f1 = f3 & f2; - if (f1 != V{0}) return false; - f2 = f1 ^ f3; - if (f2 != V{0}) return false; - f1 = V{1}; - f2 = V{2}; - f3 = f1 << f2; - if (f3 != V{4}) return false; - f2 = f3 >> f1; - return f2 == V{2}; -} - -template -bool constructor_tests() { - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - static_assert(is_constructible{}, ""); - - return true; -} - -template -bool TestVectorType() { - constexpr V v1{1}; - constexpr V v2{2}; - constexpr V v3{3}; - constexpr V v4{4}; - - V f1{1}; - V f2{1}; - V f3 = f1 + f2; - if (f3 != v2) return false; - f2 = f3 - f1; - if (f2 != v1) return false; - f1 = f2 * f3; - if (f1 != v2) return false; - f2 = f1 / f3; - if (f2 != v1) return false; - if (!integer_binary_tests(f1, f2, f3)) return false; - - f1 = V{2}; - f2 = V{1}; - f1 += f2; - if (f1 != v3) return false; - f1 -= f2; - if (f1 != v2) return false; - f1 *= f2; - if (f1 != v2) return false; - f1 /= f2; - if (f1 != v2) return false; - if (!integer_unary_tests(f1, f2)) return false; - - f1 = v2; - f2 = f1++; - if (f1 != v3) return false; - if (f2 != v2) return false; - f2 = f1--; - if (f2 != v3) return false; - if (f1 != v2) return false; - f2 = ++f1; - if (f1 != v3) return false; - if (f2 != v3) return false; - f2 = --f1; - if (f1 != v2) return false; - if (f2 != v2) return false; - - if (!constructor_tests()) return false; - - f1 = v3; - f2 = v4; - f3 = v3; - if (f1 == f2) return false; - if (!(f1 != f2)) return false; - - using T = typename V::value_type; - - const T& x = f1.x; - T& y = f2.x; - const volatile T& z = f3.x; - volatile T& w = f2.x; - - if (x != T{3}) return false; - if (y != T{4}) return false; - if (z != T{3}) return false; - if (w != T{4}) return false; - - stringstream str; - str << f1.x; - str >> f2.x; - - if (f1.x != f2.x) return false; - - return true; -} - -template* = nullptr> -bool TestVectorTypes() { - return true; -} - -template -bool TestVectorTypes() { - if (!TestVectorType()) return false; - return TestVectorTypes(); -} - -bool CheckVectorTypes() { - return TestVectorTypes< - char1, char2, char3, char4, - uchar1, uchar2, uchar3, uchar4, - short1, short2, short3, short4, - ushort1, ushort2, ushort3, ushort4, - int1, int2, int3, int4, - uint1, uint2, uint3, uint4, - long1, long2, long3, long4, - ulong1, ulong2, ulong3, ulong4, - longlong1, longlong2, longlong3, longlong4, - ulonglong1, ulonglong2, ulonglong3, ulonglong4, - float1, float2, float3, float4, - double1, double2, double3, double4>(); -} - -int main() { - static_assert(sizeof(float1) == 4, ""); - static_assert(sizeof(float2) >= 8, ""); - static_assert(sizeof(float3) == 12, ""); - static_assert(sizeof(float4) >= 16, ""); - - if (CheckVectorTypes()) { - float1 f1 = make_float1(1.0f); - passed(); - } - else { - failed("Failed some vector test on the host side."); - } -} \ No newline at end of file diff --git a/tests/src/deviceLib/hipVectorTypesDevice.cpp b/tests/src/deviceLib/hipVectorTypesDevice.cpp deleted file mode 100644 index 6ec6271b45..0000000000 --- a/tests/src/deviceLib/hipVectorTypesDevice.cpp +++ /dev/null @@ -1,319 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia amd - * TEST: %t - * HIT_END - */ - -#include - -#include "test_common.h" - -#include -#include -#include - -using namespace std; - -template __device__ -typename std::add_rvalue_reference::type _declval() noexcept; - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -__device__ -constexpr -bool integer_unary_tests(const V&, const V&) { - return true; -} - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -__device__ -bool integer_unary_tests(V& f1, V& f2) { - f1 %= f2; - if (f1 != V{0}) return false; - - f1 &= f2; - if (f1 != V{0}) return false; - f1 |= f2; - if (f1 != V{1}) return false; - f1 ^= f2; - if (f1 != V{0}) return false; - f1 = V{1}; - f1 <<= f2; - if (f1 != V{2}) return false; - f1 >>= f2; - if (f1 != V{1}) return false; - f2 = ~f1; - return f2 == V{~1}; - - return true; -} - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -__device__ -constexpr -bool integer_binary_tests(const V&, const V&, const V&) { - return true; -} - -template< - typename V, - Enable_if_t().x)>{}>* = nullptr> -__device__ -bool integer_binary_tests(V& f1, V& f2, V& f3) { - f3 = f1 % f2; - if (f3 != V{0}) return false; - f1 = f3 & f2; - if (f1 != V{0}) return false; - f2 = f1 ^ f3; - if (f2 != V{0}) return false; - f1 = V{1}; - f2 = V{2}; - f3 = f1 << f2; - if (f3 != V{4}) return false; - f2 = f3 >> f1; - return f2 == V{2}; -} - -template -__device__ -bool TestVectorType() { - constexpr V v1{1}; - constexpr V v2{2}; - constexpr V v3{3}; - constexpr V v4{4}; - - V f1{1}; - V f2{1}; - V f3 = f1 + f2; - if (f3 != V{2}) return false; - f2 = f3 - f1; - if (f2 != V{1}) return false; - f1 = f2 * f3; - if (f1 != V{2}) return false; - f2 = f1 / f3; - if (f2 != V{1}) return false; - if (!integer_binary_tests(f1, f2, f3)) return false; - - f1 = v2; - f2 = v1; - f1 += f2; - if (f1 != v3) return false; - f1 -= f2; - if (f1 != v2) return false; - f1 *= f2; - if (f1 != v2) return false; - f1 /= f2; - if (f1 != v2) return false; - if (!integer_unary_tests(f1, f2)) return false; - - f1 = v2; - f2 = f1++; - if (f1 != v3) return false; - if (f2 != v2) return false; - f2 = f1--; - if (f2 != v3) return false; - if (f1 != v2) return false; - f2 = ++f1; - if (f1 != v3) return false; - if (f2 != v3) return false; - f2 = --f1; - if (f1 != v2) return false; - if (f2 != v2) return false; - - f1 = v3; - f2 = v4; - f3 = v3; - if (f1 == f2) return false; - if (!(f1 != f2)) return false; - - #if 0 // TODO: investigate on GFX8 - using T = typename V::value_type; - - const T& x = f1.x; - T& y = f2.x; - const volatile T& z = f3.x; - volatile T& w = f2.x; - - if (x != T{3}) return false; - if (y != T{4}) return false; - if (z != T{3}) return false; - if (w != T{4}) return false; - #endif - - return true; -} - -template* = nullptr> -__device__ -bool TestVectorTypes() { - return true; -} - -template -__device__ -bool TestVectorTypes() { - if (!TestVectorType()) return false; - return TestVectorTypes(); -} - -__global__ -void CheckVectorTypes(bool* ptr) { - ptr[0] = TestVectorTypes< - char1, char2, char3, char4, - uchar1, uchar2, uchar3, uchar4, - short1, short2, short3, short4, - ushort1, ushort2, ushort3, ushort4, - int1, int2, int3, int4, - uint1, uint2, uint3, uint4, - long1, long2, long3, long4, - ulong1, ulong2, ulong3, ulong4, - longlong1, longlong2, longlong3, longlong4, - ulonglong1, ulonglong2, ulonglong3, ulonglong4, - float1, float2, float3, float4, - double1, double2, double3, double4>(); -} - - -template -__global__ -void CheckSharedVectorType(bool* ptr) { - constexpr V v1{1}; - constexpr V v2{2}; - constexpr V v3{3}; - constexpr V v4{4}; - __shared__ V f1, f2, f3; - - *ptr = true; - f1 = V{1}; - f2 = V{1}; - f3 = f1 + f2; - *ptr = *ptr && f3 == V{2}; - f2 = f3 - f1; - *ptr = *ptr && f2 == V{1}; - f1 = f2 * f3; - *ptr = *ptr && f1 == V{2}; - f2 = f1 / f3; - *ptr = *ptr && f2 == V{1}; - *ptr = *ptr && integer_binary_tests(f1, f2, f3); - - f1 = v2; - f2 = v1; - f1 += f2; - *ptr = *ptr && f1 == v3; - f1 -= f2; - *ptr = *ptr && f1 == v2; - f1 *= f2; - *ptr = *ptr && f1 == v2; - f1 /= f2; - *ptr = *ptr && f1 == v2; - *ptr = *ptr && integer_unary_tests(f1, f2); - - f1 = v2; - f2 = f1++; - *ptr = *ptr && f1 == v3; - *ptr = *ptr && f2 == v2; - f2 = f1--; - *ptr = *ptr && f2 == v3; - *ptr = *ptr && f1 == v2; - f2 = ++f1; - *ptr = *ptr && f1 == v3; - *ptr = *ptr && f2 == v3; - f2 = --f1; - *ptr = *ptr && f1 == v2; - *ptr = *ptr && f2 == v2; - - f1 = v3; - f2 = v4; - f3 = v3; - *ptr = *ptr && f1 != f2; -} - -template -bool run_CheckSharedVectorType() { - bool* ptr = nullptr; - if (hipMalloc(&ptr, sizeof(bool)) != HIP_SUCCESS) return false; - unique_ptr correct{ptr, hipFree}; - hipLaunchKernelGGL( - (CheckSharedVectorType), dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get()); - bool passed = true; - if (hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)) != HIP_SUCCESS) { - return false; - } - return passed; -} - -template* = nullptr> -bool run_CheckSharedVectorTypes() { - return true; -} - -template -bool run_CheckSharedVectorTypes() { - return run_CheckSharedVectorType() && - run_CheckSharedVectorTypes(); -} - -int main() { - static_assert(sizeof(float1) == 4, ""); - static_assert(sizeof(float2) >= 8, ""); - static_assert(sizeof(float3) >= 12, ""); - static_assert(sizeof(float4) >= 16, ""); - - bool* ptr = nullptr; - if (hipMalloc(&ptr, sizeof(bool)) != HIP_SUCCESS) return EXIT_FAILURE; - unique_ptr correct{ptr, hipFree}; - hipLaunchKernelGGL( - CheckVectorTypes, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get()); - bool passed = true; - if (hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)) != HIP_SUCCESS) { - return EXIT_FAILURE; - } - - passed = passed && run_CheckSharedVectorTypes< - char1, char2, char3, char4, - uchar1, uchar2, uchar3, uchar4, - short1, short2, short3, short4, - ushort1, ushort2, ushort3, ushort4, - int1, int2, int3, int4, - uint1, uint2, uint3, uint4, - long1, long2, long3, long4, - ulong1, ulong2, ulong3, ulong4, - longlong1, longlong2, longlong3, longlong4, - ulonglong1, ulonglong2, ulonglong3, ulonglong4, - float1, float2, float3, float4, - double1, double2, double3, double4>(); - - if (passed == true) { - passed(); - } - else { - failed("Failed some vector test."); - } -} diff --git a/tests/src/deviceLib/hipVersion.cpp b/tests/src/deviceLib/hipVersion.cpp deleted file mode 100644 index 9dc9141afe..0000000000 --- a/tests/src/deviceLib/hipVersion.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - - #include "test_common.h" - -/* - * Note : Compile time test hence always returns success while run. - * Intension is to make sure apilcation can access hip version. - */ - int main() - { - std::cout<<"Hip major version : "< -#include - -#include -#include -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -__global__ void warpvote(int* device_any, int* device_all, - int Num_Warps_per_Block, int pshift) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - device_any[threadIdx.x >> pshift] = __any(tid - 77); - device_all[threadIdx.x >> pshift] = __all(tid - 77); -} - -int main(int argc, char* argv[]) { - int warpSize, pshift; - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - warpSize = devProp.warpSize; - - int w = warpSize; - pshift = 0; - while (w >>= 1) ++pshift; - - printf("warpSize=%d pshift=%d\n", warpSize, pshift); - - - int anycount = 0; - int allcount = 0; - int Num_Threads_per_Block = 1024; - int Num_Blocks_per_Grid = 1; - int Num_Warps_per_Block = Num_Threads_per_Block / warpSize; - int Num_Warps_per_Grid = (Num_Threads_per_Block * Num_Blocks_per_Grid) / warpSize; - - int* host_any = (int*)malloc(Num_Warps_per_Grid * sizeof(int)); - int* host_all = (int*)malloc(Num_Warps_per_Grid * sizeof(int)); - int* device_any; - int* device_all; - HIP_ASSERT(hipMalloc((void**)&device_any, Num_Warps_per_Grid * sizeof(int))); - HIP_ASSERT(hipMalloc((void**)&device_all, Num_Warps_per_Grid * sizeof(int))); - for (int i = 0; i < Num_Warps_per_Grid; i++) { - host_any[i] = 0; - host_all[i] = 0; - } - HIP_ASSERT(hipMemcpy(device_any, host_any, sizeof(int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(device_all, host_all, sizeof(int), hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(warpvote, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0, - device_any, device_all, Num_Warps_per_Block, pshift); - - - HIP_ASSERT( - hipMemcpy(host_any, device_any, Num_Warps_per_Grid * sizeof(int), hipMemcpyDeviceToHost)); - HIP_ASSERT( - hipMemcpy(host_all, device_all, Num_Warps_per_Grid * sizeof(int), hipMemcpyDeviceToHost)); - for (int i = 0; i < Num_Warps_per_Grid; i++) { - printf("warp no. %d __any = %d \n", i, host_any[i]); - printf("warp no. %d __all = %d \n", i, host_all[i]); - - if (host_all[i] != 1) ++allcount; - if (host_any[i] != 1) ++anycount; - } - - if (anycount == 0 && allcount == 1) - printf("PASSED\n"); - else { - printf("FAILED\n"); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/tests/src/deviceLib/hip_ballot.cpp b/tests/src/deviceLib/hip_ballot.cpp deleted file mode 100644 index 84994b6b99..0000000000 --- a/tests/src/deviceLib/hip_ballot.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS --Wno-deprecated-declarations - * TEST: %t - * HIT_END - */ - -#include - -#include -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -__global__ void gpu_ballot(unsigned int* device_ballot, int Num_Warps_per_Block, - int pshift) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - const unsigned int warp_num = threadIdx.x >> pshift; -#ifdef __HIP_PLATFORM_AMD__ - atomicAdd(&device_ballot[warp_num + blockIdx.x * Num_Warps_per_Block], - __popcll(__ballot(tid - 245))); -#else - atomicAdd(&device_ballot[warp_num + blockIdx.x * Num_Warps_per_Block], - __popc(__ballot(tid - 245))); -#endif -} - - -int main(int argc, char* argv[]) { - int warpSize, pshift; - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - - warpSize = devProp.warpSize; - - int w = warpSize; - pshift = 0; - while (w >>= 1) ++pshift; - - unsigned int Num_Threads_per_Block = 512; - unsigned int Num_Blocks_per_Grid = 1; - unsigned int Num_Warps_per_Block = Num_Threads_per_Block / warpSize; - unsigned int Num_Warps_per_Grid = (Num_Threads_per_Block * Num_Blocks_per_Grid) / warpSize; - unsigned int* host_ballot = (unsigned int*)malloc(Num_Warps_per_Grid * sizeof(unsigned int)); - unsigned int* device_ballot; - HIP_ASSERT(hipMalloc((void**)&device_ballot, Num_Warps_per_Grid * sizeof(unsigned int))); - int divergent_count = 0; - for (int i = 0; i < Num_Warps_per_Grid; i++) host_ballot[i] = 0; - - - HIP_ASSERT(hipMemcpy(device_ballot, host_ballot, Num_Warps_per_Grid * sizeof(unsigned int), - hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(gpu_ballot, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0, - device_ballot, Num_Warps_per_Block, pshift); - - - HIP_ASSERT(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid * sizeof(unsigned int), - hipMemcpyDeviceToHost)); - for (int i = 0; i < Num_Warps_per_Grid; i++) { - if ((host_ballot[i] == 0) || (host_ballot[i] / warpSize == warpSize)) - std::cout << "Warp " << i << " IS convergent- Predicate true for " - << host_ballot[i] / warpSize << " threads\n"; - - else { - std::cout << " Warp " << i << " IS divergent - Predicate true for " - << host_ballot[i] / warpSize << " threads\n"; - divergent_count++; - } - } - - if (divergent_count == 1) - printf("PASSED\n"); - else { - printf("FAILED\n"); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} diff --git a/tests/src/deviceLib/hip_bitextract.cpp b/tests/src/deviceLib/hip_bitextract.cpp deleted file mode 100644 index ffb371f78c..0000000000 --- a/tests/src/deviceLib/hip_bitextract.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -#define TEST_DEBUG (0) - - -// CPU implementation of bitextract -template -T bit_extract(T src0, unsigned int src1, unsigned int src2) { - unsigned int bits = sizeof(T) * 8; - T offset = src1 & (bits - 1); - T width = src2 & (bits - 1); - if (width == 0) { - return 0; - } else { - return (src0 << (bits - width - offset)) >> (bits - width); - } -} - -__global__ void HIP_kernel(unsigned int* out32, unsigned int* in32_0, - unsigned int* in32_1, unsigned int* in32_2, - unsigned long long int* out64, unsigned long long int* in64_0, - unsigned int* in64_1, unsigned int* in64_2) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - - out32[x] = __bitextract_u32(in32_0[x], in32_1[x], in32_2[x]); - out64[x] = __bitextract_u64(in64_0[x], in64_1[x], in64_2[x]); -} - - -using namespace std; - -int main() { - unsigned int* hostOut32; - unsigned int* hostSrc032; - unsigned int* hostSrc132; - unsigned int* hostSrc232; - unsigned long long int* hostOut64; - unsigned long long int* hostSrc064; - unsigned int* hostSrc164; - unsigned int* hostSrc264; - - unsigned int* deviceOut32; - unsigned int* deviceSrc032; - unsigned int* deviceSrc132; - unsigned int* deviceSrc232; - unsigned long long int* deviceOut64; - unsigned long long int* deviceSrc064; - unsigned int* deviceSrc164; - unsigned int* deviceSrc264; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - unsigned int wave_size = devProp.warpSize; - unsigned int num_waves_per_block = 2; - unsigned int num_threads_per_block = wave_size * num_waves_per_block; - unsigned int num_blocks = 2; - unsigned int NUM = num_threads_per_block * num_blocks; - - int i; - int errors; - - hostOut32 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc032 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc132 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc232 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - - hostOut64 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostSrc064 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostSrc164 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc264 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - - // initialize the input data - std::random_device rd; - std::uniform_int_distribution uint32_src0_dist; - std::uniform_int_distribution uint32_src12_dist(0,31); - std::uniform_int_distribution uint64_src0_dist; - std::uniform_int_distribution uint64_src12_dist(0,63); - for (i = 0; i < NUM; i++) { - hostOut32[i] = 0; - hostSrc032[i] = uint32_src0_dist(rd); - hostSrc132[i] = uint32_src12_dist(rd); - hostSrc232[i] = uint32_src12_dist(rd); - if (hostSrc132[i] + hostSrc232[i] > 32) - hostSrc232[i] = 32 - hostSrc132[i]; - hostOut64[i] = 0; - hostSrc064[i] = uint64_src0_dist(rd); - hostSrc164[i] = uint64_src12_dist(rd); - hostSrc264[i] = uint64_src12_dist(rd); - } - - HIP_ASSERT(hipMalloc((void**)&deviceOut32, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc032, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc132, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc232, NUM * sizeof(unsigned int))); - - HIP_ASSERT(hipMalloc((void**)&deviceOut64, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc064, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc164, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc264, NUM * sizeof(unsigned int))); - - HIP_ASSERT(hipMemcpy(deviceSrc032, hostSrc032, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc132, hostSrc132, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc232, hostSrc232, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - - HIP_ASSERT(hipMemcpy(deviceSrc064, hostSrc064, NUM * sizeof(unsigned long long int), - hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc164, hostSrc164, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc264, hostSrc264, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - - - hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), - 0, 0, - deviceOut32, deviceSrc032, deviceSrc132, deviceSrc232, - deviceOut64, deviceSrc064, deviceSrc164, deviceSrc264); - - - HIP_ASSERT(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostOut64, deviceOut64, - NUM * sizeof(unsigned long long int), hipMemcpyDeviceToHost)); - - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostOut32[i] != bit_extract(hostSrc032[i], hostSrc132[i], hostSrc232[i])) { - errors++; -#if TEST_DEBUG - cout << "device: " << hostOut32[i] << " host: " - << bit_extract(hostSrc032[i], hostSrc132[i], hostSrc232[i]) - << " " << hostSrc032[i] << " " << hostSrc132[i] << " " << hostSrc232[i] << "\n"; -#endif - } - } - if (errors != 0) { - cout << "__bitextract_u32() FAILED\n" << endl; - return -1; - } else { - cout << "__bitextract_u32() checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostOut64[i] != bit_extract(hostSrc064[i], hostSrc164[i], hostSrc264[i])) { - errors++; -#if TEST_DEBUG - cout << "device: " << hostOut64[i] << " host: " - << bit_extract(hostSrc064[i], hostSrc164[i], hostSrc264[i]) - << " " << hostSrc064[i] << " " << hostSrc164[i] << " " << hostSrc264[i] << "\n"; -#endif - } - } - if (errors != 0) { - cout << "__bitextract_u64() FAILED" << endl; - return -1; - } else { - cout << "__bitextract_u64() checked!" << endl; - } - - cout << "__bitextract_u32() and __bitextract_u64() PASSED!" << endl; - - HIP_ASSERT(hipFree(deviceOut32)); - HIP_ASSERT(hipFree(deviceSrc032)); - HIP_ASSERT(hipFree(deviceSrc132)); - HIP_ASSERT(hipFree(deviceSrc232)); - HIP_ASSERT(hipFree(deviceOut64)); - HIP_ASSERT(hipFree(deviceSrc064)); - HIP_ASSERT(hipFree(deviceSrc164)); - HIP_ASSERT(hipFree(deviceSrc264)); - - free(hostOut32); - free(hostSrc032); - free(hostSrc132); - free(hostSrc232); - free(hostOut64); - free(hostSrc064); - free(hostSrc164); - free(hostSrc264); - - return errors; -} diff --git a/tests/src/deviceLib/hip_bitinsert.cpp b/tests/src/deviceLib/hip_bitinsert.cpp deleted file mode 100644 index 704001131a..0000000000 --- a/tests/src/deviceLib/hip_bitinsert.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -#define TEST_DEBUG (0) - - -// CPU implementation of bitinsert -template -T bit_insert(T src0, T src1, unsigned int src2, unsigned int src3) { - unsigned int bits = sizeof(T) * 8; - T offset = src2 & (bits - 1); - T width = src3 & (bits - 1); - T mask = (((T)1) << width) - 1; - return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); -} - -__global__ void HIP_kernel(unsigned int* out32, - unsigned int* in32_0, unsigned int* in32_1, - unsigned int* in32_2, unsigned int* in32_3, - unsigned long long int* out64, unsigned long long int* in64_0, - unsigned long long int* in64_1, unsigned int* in64_2, - unsigned int* in64_3) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - - out32[x] = __bitinsert_u32(in32_0[x], in32_1[x], in32_2[x], in32_3[x]); - out64[x] = __bitinsert_u64(in64_0[x], in64_1[x], in64_2[x], in64_3[x]); -} - - -using namespace std; - -int main() { - unsigned int* hostOut32; - unsigned int* hostSrc032; - unsigned int* hostSrc132; - unsigned int* hostSrc232; - unsigned int* hostSrc332; - unsigned long long int* hostOut64; - unsigned long long int* hostSrc064; - unsigned long long int* hostSrc164; - unsigned int* hostSrc264; - unsigned int* hostSrc364; - - unsigned int* deviceOut32; - unsigned int* deviceSrc032; - unsigned int* deviceSrc132; - unsigned int* deviceSrc232; - unsigned int* deviceSrc332; - unsigned long long int* deviceOut64; - unsigned long long int* deviceSrc064; - unsigned long long int* deviceSrc164; - unsigned int* deviceSrc264; - unsigned int* deviceSrc364; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - unsigned int wave_size = devProp.warpSize; - unsigned int num_waves_per_block = 2; - unsigned int num_threads_per_block = wave_size * num_waves_per_block; - unsigned int num_blocks = 2; - unsigned int NUM = num_threads_per_block * num_blocks; - - int i; - int errors; - - hostOut32 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc032 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc132 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc232 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc332 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - - hostOut64 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostSrc064 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostSrc164 = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostSrc264 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostSrc364 = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - - // initialize the input data - std::random_device rd; - std::uniform_int_distribution uint32_src01_dist; - std::uniform_int_distribution uint32_src23_dist(0,31); - std::uniform_int_distribution uint64_src01_dist; - std::uniform_int_distribution uint64_src23_dist(0,63); - for (i = 0; i < NUM; i++) { - hostOut32[i] = 0; - hostSrc032[i] = uint32_src01_dist(rd); - hostSrc132[i] = uint32_src01_dist(rd); - hostSrc232[i] = uint32_src23_dist(rd); - hostSrc232[i] = uint32_src23_dist(rd); - hostOut64[i] = 0; - hostSrc064[i] = uint64_src01_dist(rd); - hostSrc164[i] = uint64_src01_dist(rd); - hostSrc264[i] = uint64_src23_dist(rd); - hostSrc264[i] = uint64_src23_dist(rd); - } - - HIP_ASSERT(hipMalloc((void**)&deviceOut32, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc032, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc132, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc232, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc332, NUM * sizeof(unsigned int))); - - HIP_ASSERT(hipMalloc((void**)&deviceOut64, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc064, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc164, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc264, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceSrc364, NUM * sizeof(unsigned int))); - - HIP_ASSERT(hipMemcpy(deviceSrc032, hostSrc032, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc132, hostSrc132, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc232, hostSrc232, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc332, hostSrc332, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - - HIP_ASSERT(hipMemcpy(deviceSrc064, hostSrc064, NUM * sizeof(unsigned long long int), - hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc164, hostSrc164, NUM * sizeof(unsigned long long int), - hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc264, hostSrc264, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceSrc364, hostSrc364, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - - - hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), - 0, 0, - deviceOut32, deviceSrc032, deviceSrc132, deviceSrc232, deviceSrc332, - deviceOut64, deviceSrc064, deviceSrc164, deviceSrc264, deviceSrc364); - - - HIP_ASSERT(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostOut64, deviceOut64, - NUM * sizeof(unsigned long long int), hipMemcpyDeviceToHost)); - - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostOut32[i] != bit_insert(hostSrc032[i], hostSrc132[i], - hostSrc232[i], hostSrc332[i])) { - errors++; -#if TEST_DEBUG - cout << "device: " << hostOut32[i] << " host: " - << bit_insert(hostSrc032[i], hostSrc132[i], hostSrc232[i], hostSrc332[i]) - << " " << hostSrc032[i] << " " << hostSrc132[i] << " " << hostSrc232[i] - << " " << hostSrc332[i] << "\n"; -#endif - } - } - if (errors != 0) { - cout << "__bitinsert_u32() FAILED\n" << endl; - return -1; - } else { - cout << "__bitinsert_u32() checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostOut64[i] != bit_insert(hostSrc064[i], hostSrc164[i], - hostSrc264[i], hostSrc364[i])) { - errors++; -#if TEST_DEBUG - cout << "device: " << hostOut64[i] << " host: " - << bit_insert(hostSrc064[i], hostSrc164[i], hostSrc264[i], hostSrc364[i]) - << " " << hostSrc064[i] << " " << hostSrc164[i] << " " << hostSrc264[i] - << " " << hostSrc364[i] << "\n"; -#endif - } - } - if (errors != 0) { - cout << "__bitinsert_u64() FAILED" << endl; - return -1; - } else { - cout << "__bitinsert_u64() checked!" << endl; - } - - cout << "__bitinsert_u32() and __bitinsert_u64() PASSED!" << endl; - - HIP_ASSERT(hipFree(deviceOut32)); - HIP_ASSERT(hipFree(deviceSrc032)); - HIP_ASSERT(hipFree(deviceSrc132)); - HIP_ASSERT(hipFree(deviceSrc232)); - HIP_ASSERT(hipFree(deviceSrc332)); - HIP_ASSERT(hipFree(deviceOut64)); - HIP_ASSERT(hipFree(deviceSrc064)); - HIP_ASSERT(hipFree(deviceSrc164)); - HIP_ASSERT(hipFree(deviceSrc264)); - HIP_ASSERT(hipFree(deviceSrc364)); - - free(hostOut32); - free(hostSrc032); - free(hostSrc132); - free(hostSrc232); - free(hostSrc332); - free(hostOut64); - free(hostSrc064); - free(hostSrc164); - free(hostSrc264); - free(hostSrc364); - - return errors; -} diff --git a/tests/src/deviceLib/hip_brev.cpp b/tests/src/deviceLib/hip_brev.cpp deleted file mode 100644 index 9783b98e16..0000000000 --- a/tests/src/deviceLib/hip_brev.cpp +++ /dev/null @@ -1,175 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - - -#define WIDTH 32 -#define HEIGHT 32 - -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 8 -#define THREADS_PER_BLOCK_Y 8 -#define THREADS_PER_BLOCK_Z 1 - - -// CPU implementation of bitreverse -template -T bitreverse(T num) { - T count = sizeof(num) * 8 - 1; - T reverse_num = num; - - num >>= 1; - while (num) { - reverse_num <<= 1; - reverse_num |= num & 1; - num >>= 1; - count--; - } - reverse_num <<= count; - return reverse_num; -} - -__global__ void HIP_kernel(unsigned int* a, unsigned int* b, - unsigned long long int* c, unsigned long long int* d, int width, - int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = __brev(b[i]); - c[i] = __brevll(d[i]); - } -} - - -using namespace std; - -int main() { - unsigned int* hostA; - unsigned int* hostB; - unsigned long long int* hostC; - unsigned long long int* hostD; - - unsigned int* deviceA; - unsigned int* deviceB; - unsigned long long int* deviceC; - unsigned long long int* deviceD; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - - int i; - int errors; - - hostA = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostC = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = i; - hostD[i] = i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT( - hipMemcpy(deviceD, hostD, NUM * sizeof(unsigned long long int), hipMemcpyHostToDevice)); - - - hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - deviceD, WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT( - hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned long long int), hipMemcpyDeviceToHost)); - - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != bitreverse(hostB[i])) { - errors++; - } - } - if (errors != 0) { - cout << "__brev() FAILED\n" << endl; - return -1; - } else { - cout << "__brev() checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostC[i] != bitreverse(hostD[i])) { - errors++; - } - } - if (errors != 0) { - cout << "__brevll() FAILED" << endl; - return -1; - } else { - cout << "__brevll() checked!" << endl; - } - - cout << "__brev() and __brevll() PASSED!" << endl; - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - HIP_ASSERT(hipFree(deviceD)); - - free(hostA); - free(hostB); - free(hostC); - free(hostD); - - return errors; -} diff --git a/tests/src/deviceLib/hip_clz.cpp b/tests/src/deviceLib/hip_clz.cpp deleted file mode 100644 index 367131b5e6..0000000000 --- a/tests/src/deviceLib/hip_clz.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) -#define WIDTH 8 -#define HEIGHT 8 -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 8 -#define THREADS_PER_BLOCK_Y 8 -#define THREADS_PER_BLOCK_Z 1 - -unsigned int firstbit_u32(unsigned int a) { - if (a == 0) { - return 32; - } - unsigned int pos = 0; - while ((int)a > 0) { - a <<= 1; - pos++; - } - return pos; -} - -unsigned int firstbit_u64(unsigned long long int a) { - if (a == 0) { - return 64; - } - unsigned int pos = 0; - while ((long long int)a > 0) { - a <<= 1; - pos++; - } - return pos; -} - -// Check implicit conversion will not cause ambiguity. -__device__ void test_ambiguity() { - short s; - unsigned short us; - float f; - int i; - unsigned int ui; - __clz(f); - __clz(s); - __clz(us); - __clzll(f); - __clzll(i); - __clzll(ui); -} - -__global__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned int* c, - unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = __clz(b[i]); - c[i] = __clzll(d[i]); - } -} - -using namespace std; - -int main() { - unsigned int* hostA; - unsigned int* hostB; - unsigned int* hostC; - unsigned long long int* hostD; - - unsigned int* deviceA; - unsigned int* deviceB; - unsigned int* deviceC; - unsigned long long int* deviceD; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - unsigned int i; - int errors; - - hostA = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = 419430 * i; - hostD[i] = i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT( - hipMemcpy(deviceD, hostD, NUM * sizeof(unsigned long long int), hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - deviceD, WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - printf("gpu_clz =%d, cpu_clz =%d \n", hostA[i], firstbit_u32(hostB[i])); - if (hostA[i] != firstbit_u32(hostB[i])) { - errors++; - } - } - if (errors != 0) { - cout << "FAILED clz" << endl; - return -1; - } else { - cout << "__clz() checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - printf("gpu_clzll =%d, cpu_clzll =%d \n", hostC[i], firstbit_u64(hostD[i])); - if (hostC[i] != firstbit_u64(hostD[i])) { - errors++; - } - } - if (errors != 0) { - cout << "FAILED clz" << endl; - return -1; - } else { - cout << "__clzll() checked!" << endl; - } - - cout << "clz test PASSED!" << endl; - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - HIP_ASSERT(hipFree(deviceD)); - - free(hostA); - free(hostB); - free(hostC); - free(hostD); - - return errors; -} diff --git a/tests/src/deviceLib/hip_ffs.cpp b/tests/src/deviceLib/hip_ffs.cpp deleted file mode 100644 index 089dd1f6ce..0000000000 --- a/tests/src/deviceLib/hip_ffs.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - - -#define WIDTH 8 -#define HEIGHT 8 - -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 8 -#define THREADS_PER_BLOCK_Y 8 -#define THREADS_PER_BLOCK_Z 1 - -template -int lastbit(T a) { - if (a == 0) - return 0; - int pos = 1; - while ((a & 1) != 1) { - a >>= 1; - pos++; - } - return pos; -} - - -__global__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned int* c, - unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = __ffs(b[i]); - c[i] = __ffsll(d[i]); - } -} - - -using namespace std; - -int main() { - unsigned int* hostA; - unsigned int* hostB; - unsigned int* hostC; - unsigned long long int* hostD; - - unsigned int* deviceA; - unsigned int* deviceB; - unsigned int* deviceC; - unsigned long long int* deviceD; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - - int i; - int errors; - - hostA = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = i; - hostD[i] = 1099511627776 + i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT( - hipMemcpy(deviceD, hostD, NUM * sizeof(unsigned long long int), hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - deviceD, WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - printf("gpu_ffs =%d, cpu_ffs =%d \n", hostA[i], lastbit(hostB[i])); - if (hostA[i] != lastbit(hostB[i])) { - errors++; - } - } - if (errors != 0) { - cout << "FAILED: ffs" << endl; - return -1; - } else { - cout << "__ffs() for unsigned checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - printf("gpu_ffsll =%d, cpu_ffsll =%d \n", hostC[i], lastbit(hostD[i])); - if (hostC[i] != lastbit(hostD[i])) { - errors++; - } - } - if (errors != 0) { - cout << "FAILED: ffs" << endl; - return -1; - } else { - cout << "__ffsll() for unsigned checked!" << endl; - } - - cout << "ffs test PASSED!" << endl; - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - HIP_ASSERT(hipFree(deviceD)); - - - free(hostA); - free(hostB); - free(hostC); - free(hostD); - - return errors; -} diff --git a/tests/src/deviceLib/hip_floatnTM.cpp b/tests/src/deviceLib/hip_floatnTM.cpp deleted file mode 100644 index 92dc4b639f..0000000000 --- a/tests/src/deviceLib/hip_floatnTM.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia HIPCC_OPTIONS -std=c++14 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -static std::random_device dev; -static std::mt19937 rng(dev()); - -template -__host__ __device__ inline constexpr int count() { - return sizeof(T) / sizeof(M); -} - -inline float getRandomFloat(float min = 10, float max = 100) { - std::uniform_real_distribution gen(min, max); - return gen(rng); -} - -template -void fillMatrix(T* a, int size) { - for (int i = 0; i < size; i++) { - T t; - t.x = getRandomFloat(); - if constexpr (count() >= 2) t.y = getRandomFloat(); - if constexpr (count() >= 3) t.z = getRandomFloat(); - if constexpr (count() >= 4) t.w = getRandomFloat(); - - a[i] = t; - } -} - -// Test operations -template -__host__ __device__ void testOperations(T& a, T& b) { - a.x += b.x; - a.x++; - b.x++; - if constexpr (count() >= 2) { - a.y = b.x; - a.x = b.y; - } - if constexpr (count() >= 3) { - if (a.x > 0) b.x /= a.x; - a.x *= b.z; - a.y--; - } - if constexpr (count() >= 4) { - b.w = a.x; - a.w += (-b.y); - } -} - -template -__global__ void testOperationsGPU(T* d_a, T* d_b, int size) { - int id = threadIdx.x; - if (id > size) return; - T &a = d_a[id]; - T &b = d_b[id]; - - testOperations(a, b); -} - - -template -void dcopy(T* a, T* b, int size) { - for (int i = 0; i < size; i++) { - a[i] = b[i]; - } -} - -template -bool isEqual(T* a, T* b, int size) { - for (int i = 0; i < size; i++) { - if (a[i] != b[i]) { - return false; - } - } - return true; -} - -// Main function that tests type -// T = what you want to test -// D = pack of 1 i.e. float1 int1 -template -void testType(int msize) { - T *fa, *fb, *fc, *h_fa, *h_fb; - fa = new T[msize]; - fb = new T[msize]; - fc = new T[msize]; - h_fa = new T[msize]; - h_fb = new T[msize]; - - T *d_fa, *d_fb; - - constexpr int c = count(); - - if (c <= 0 || c >= 5) { - failed("Invalid Size\n"); - } - - fillMatrix(fa, msize); - dcopy(fb, fa, msize); - dcopy(h_fa, fa, msize); - dcopy(h_fb, fa, msize); - for (int i = 0; i < msize; i++) testOperations(h_fa[i], h_fb[i]); - - hipMalloc(&d_fa, sizeof(T) * msize); - hipMalloc(&d_fb, sizeof(T) * msize); - - hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); - hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice); - - auto kernel = testOperationsGPU; - hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); - - hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost); - - bool pass = true; - if (!isEqual(h_fa, fc, msize)) { - pass = false; - } - - delete[] fa; - delete[] fb; - delete[] fc; - delete[] h_fa; - delete[] h_fb; - hipFree(d_fa); - hipFree(d_fb); - - if (!pass) { - failed("Failed"); - } -} - -int main() { - const int msize = 100; - // double - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // floats - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // ints - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // chars - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // long - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // longlong - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // short - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // uints - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // uchars - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // ulong - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // ulonglong - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - // ushort - testType(msize); - testType(msize); - testType(msize); - testType(msize); - - passed(); -} diff --git a/tests/src/deviceLib/hip_funnelshift.cpp b/tests/src/deviceLib/hip_funnelshift.cpp deleted file mode 100644 index 4cb3af2fa1..0000000000 --- a/tests/src/deviceLib/hip_funnelshift.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -#define NUM_TESTS 65 - -#define HI_INT 0xfacefeed -#define LO_INT 0xdeadbeef - -__global__ void funnelshift_kernel(unsigned int* l_out, unsigned int* lc_out, - unsigned int* r_out, unsigned int* rc_out) { - - for (int i = 0; i < NUM_TESTS; i++) { - l_out[i] = __funnelshift_l(LO_INT, HI_INT, i); - lc_out[i] = __funnelshift_lc(LO_INT, HI_INT, i); - r_out[i] = __funnelshift_r(LO_INT, HI_INT, i); - rc_out[i] = __funnelshift_rc(LO_INT, HI_INT, i); - } -} - -static unsigned int cpu_funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift) -{ - // Concatenate hi:lo - uint64_t val = hi; - val <<= 32; - val |= lo; - // left shift by intput & 31 - val <<= (shift & 31); - // pull out upper 32 bits and return them - val >>= 32; - return val & 0xffffffff; -} - -static unsigned int cpu_funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift) -{ - // Concatenate hi:lo - uint64_t val = hi; - val <<= 32; - val |= lo; - // left shift by min(input,32) - if (shift > 32) - shift = 32; - val <<= shift; - // pull out upper 32 bits and return them - val >>= 32; - return val & 0xffffffff; -} - -static unsigned int cpu_funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift) -{ - // Concatenate hi:lo - uint64_t val = hi; - val <<= 32; - val |= lo; - // right shift by intput & 31 - val >>= (shift & 31); - // return lower 32 bits - return val & 0xffffffff; -} - -static unsigned int cpu_funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift) -{ - // Concatenate hi:lo - uint64_t val = hi; - val <<= 32; - val |= lo; - // left shift by min(input, 32) - if (shift > 32) - shift = 32; - val >>= shift; - // return lower 32 bits - return val & 0xffffffff; -} - -using namespace std; - -int main() { - unsigned int *host_l_output; - unsigned int *host_lc_output; - unsigned int *host_r_output; - unsigned int *host_rc_output; - - unsigned int *device_l_output; - unsigned int *device_lc_output; - unsigned int *device_r_output; - unsigned int *device_rc_output; - - unsigned int *golden_l; - unsigned int *golden_lc; - unsigned int *golden_r; - unsigned int *golden_rc; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - - int i; - int errors; - - host_l_output = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - host_lc_output = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - host_r_output = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - host_rc_output = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - - golden_l = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - golden_lc = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - golden_r = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - golden_rc = (unsigned int*)calloc(NUM_TESTS, sizeof(unsigned int)); - - for (int i = 0; i < NUM_TESTS; i++) { - golden_l[i] = cpu_funnelshift_l(LO_INT, HI_INT, i); - golden_lc[i] = cpu_funnelshift_lc(LO_INT, HI_INT, i); - golden_r[i] = cpu_funnelshift_r(LO_INT, HI_INT, i); - golden_rc[i] = cpu_funnelshift_rc(LO_INT, HI_INT, i); - } - - HIP_ASSERT(hipMalloc((void**)&device_l_output, NUM_TESTS * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&device_lc_output, NUM_TESTS * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&device_r_output, NUM_TESTS * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&device_rc_output, NUM_TESTS * sizeof(unsigned int))); - - hipLaunchKernelGGL(funnelshift_kernel, dim3(1), dim3(1), 0, 0, - device_l_output, device_lc_output, device_r_output, - device_rc_output); - - HIP_ASSERT(hipMemcpy(host_l_output, device_l_output, NUM_TESTS * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(host_lc_output, device_lc_output, NUM_TESTS * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(host_r_output, device_r_output, NUM_TESTS * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(host_rc_output, device_rc_output, NUM_TESTS * sizeof(unsigned int), hipMemcpyDeviceToHost)); - - // verify the results - errors = 0; - printf("HI val: 0x%x\n", HI_INT); - printf("LO val: 0x%x\n", LO_INT); - - for (i = 0; i < NUM_TESTS; i++) { - printf("gpu_funnelshift_l(%d) = 0x%x, cpu_funnelshift_l(%d) = 0x%x\n", - i, host_l_output[i], i, golden_l[i]); - if (host_l_output[i] != golden_l[i]) { - errors++; - printf("\tERROR!\n"); - } - } - if (errors != 0) { - cout << "FAILED: funnelshift_l" << endl; - return -1; - } else { - cout << "funnelshift_l checked!" << endl; - } - - errors = 0; - for (i = 0; i < NUM_TESTS; i++) { - printf("gpu_funnelshift_lc(%d) = 0x%x, cpu_funnelshift_lc(%d) = 0x%x\n", - i, host_lc_output[i], i, golden_lc[i]); - if (host_lc_output[i] != golden_lc[i]) { - errors++; - printf("\tERROR!\n"); - } - } - if (errors != 0) { - cout << "FAILED: funnelshift_lc" << endl; - return -1; - } else { - cout << "funnelshift_lc checked!" << endl; - } - - errors = 0; - for (i = 0; i < NUM_TESTS; i++) { - printf("gpu_funnelshift_r(%d) = 0x%x, cpu_funnelshift_r(%d) = 0x%x\n", - i, host_r_output[i], i, golden_r[i]); - if (host_r_output[i] != golden_r[i]) { - errors++; - printf("\tERROR!\n"); - } - } - if (errors != 0) { - cout << "FAILED: funnelshift_r" << endl; - return -1; - } else { - cout << "funnelshift_r checked!" << endl; - } - - errors = 0; - for (i = 0; i < NUM_TESTS; i++) { - printf("gpu_funnelshift_rc(%d) = 0x%x, cpu_funnelshift_rc(%d) = 0x%x\n", - i, host_rc_output[i], i, golden_rc[i]); - if (host_rc_output[i] != golden_rc[i]) { - errors++; - printf("\tERROR!\n"); - } - } - if (errors != 0) { - cout << "FAILED: funnelshift_rc" << endl; - return -1; - } else { - cout << "funnelshift_rc checked!" << endl; - } - errors = 0; - - cout << "funnelshift tests PASSED!" << endl; - - HIP_ASSERT(hipFree(device_l_output)); - HIP_ASSERT(hipFree(device_lc_output)); - HIP_ASSERT(hipFree(device_r_output)); - HIP_ASSERT(hipFree(device_rc_output)); - - free(host_l_output); - free(host_lc_output); - free(host_r_output); - free(host_rc_output); - - return errors; -} diff --git a/tests/src/deviceLib/hip_mbcnt.cpp b/tests/src/deviceLib/hip_mbcnt.cpp deleted file mode 100644 index e650df7d8d..0000000000 --- a/tests/src/deviceLib/hip_mbcnt.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -__global__ void HIP_kernel(unsigned int* mbcnt_lo, unsigned int* mbcnt_hi, unsigned int* lane_id) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - mbcnt_lo[x] = __builtin_amdgcn_mbcnt_lo(0xFFFFFFFF, 0); - mbcnt_hi[x] = __builtin_amdgcn_mbcnt_hi(0xFFFFFFFF, 0); - lane_id[x] = __lane_id(); -} - -using namespace std; - -int main() { - - unsigned int* device_mbcnt_lo; - unsigned int* device_mbcnt_hi; - unsigned int* device_lane_id; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - constexpr unsigned int num_waves_per_block = 2; - const unsigned int wave_size = devProp.warpSize; - const unsigned int num_threads_per_block = wave_size * num_waves_per_block; - const unsigned int num_blocks = 2; - const unsigned int num_threads = num_threads_per_block * num_blocks; - const size_t buffer_size = num_threads * sizeof(unsigned int); - - HIP_ASSERT(hipMalloc((void**)&device_mbcnt_lo, buffer_size)); - HIP_ASSERT(hipMalloc((void**)&device_mbcnt_hi, buffer_size)); - HIP_ASSERT(hipMalloc((void**)&device_lane_id, buffer_size)); - - hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), - dim3(num_threads_per_block), 0, 0, device_mbcnt_lo, device_mbcnt_hi, device_lane_id); - - unsigned int* host_mbcnt_lo = (unsigned int*) malloc(buffer_size); - unsigned int* host_mbcnt_hi = (unsigned int*) malloc(buffer_size); - unsigned int* host_lane_id = (unsigned int*) malloc(buffer_size); - - HIP_ASSERT(hipMemcpy(host_mbcnt_lo, device_mbcnt_lo, buffer_size, hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(host_mbcnt_hi, device_mbcnt_hi, buffer_size, hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(host_lane_id, device_lane_id, buffer_size, hipMemcpyDeviceToHost)); - - // verify the results - int mbcnt_lo_errors = 0; - int mbcnt_hi_errors = 0; - int lane_id_errors = 0; - for (unsigned int i = 0; i < num_threads; i++) { - unsigned int this_lane_id = i % wave_size; - unsigned int this_mbcnt_lo = this_lane_id >= 32 ? 32 : this_lane_id; - unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 32); - - if (host_mbcnt_lo[i] != this_mbcnt_lo) - mbcnt_lo_errors++; - - if (host_mbcnt_hi[i] != this_mbcnt_hi) - mbcnt_hi_errors++; - - if (host_lane_id[i] != this_lane_id) - lane_id_errors++; - } - - if (mbcnt_lo_errors == 0) - cout << "__mbcnt_lo() PASSED!" << endl; - else - cout << "__mbcnt_lo() FAILED!" << endl; - - - if (mbcnt_hi_errors == 0) - cout << "__mbcnt_hi() PASSED!" << endl; - else - cout << "__mbcnt_hi() FAILED!" << endl; - - if (lane_id_errors == 0) - cout << "__lane_id() PASSED!" << endl; - else - cout << "__lane_id() FAILED!" << endl; - - HIP_ASSERT(hipFree(device_mbcnt_lo)); - HIP_ASSERT(hipFree(device_mbcnt_hi)); - HIP_ASSERT(hipFree(device_lane_id)); - - free(host_mbcnt_lo); - free(host_mbcnt_hi); - free(host_lane_id); - - return mbcnt_lo_errors + mbcnt_hi_errors + lane_id_errors; -} diff --git a/tests/src/deviceLib/hip_popc.cpp b/tests/src/deviceLib/hip_popc.cpp deleted file mode 100644 index e3cc31446b..0000000000 --- a/tests/src/deviceLib/hip_popc.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - - -#define WIDTH 16 -#define HEIGHT 16 - -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 8 -#define THREADS_PER_BLOCK_Y 8 -#define THREADS_PER_BLOCK_Z 1 - - -// CPU implementation of popcount -template -unsigned int popcountCPU(T value) { - unsigned int ret = 0; - while (value) { - if (value & 0x1) ++ret; - value >>= 1; - } - return ret; -} - -__global__ void HIP_kernel(unsigned int* a, unsigned int* b, unsigned int* c, - unsigned long long int* d, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = __popc(b[i]); - c[i] = __popcll(d[i]); - } -} - - -using namespace std; - -int main() { - unsigned int* hostA; - unsigned int* hostB; - unsigned int* hostC; - unsigned long long int* hostD; - - unsigned int* deviceA; - unsigned int* deviceB; - unsigned int* deviceC; - unsigned long long int* deviceD; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - cout << "hip Device prop succeeded " << endl; - - - int i; - int errors; - - hostA = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = i; - hostD[i] = 1099511627776 - i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIP_ASSERT( - hipMemcpy(deviceD, hostD, NUM * sizeof(unsigned long long int), hipMemcpyHostToDevice)); - - - hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - deviceD, WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != popcountCPU(hostB[i])) { - errors++; - } - } - if (errors != 0) { - cout << "FAILED: popc" << endl; - return -1; - } else { - cout << "__popc() checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostC[i] != popcountCPU(hostD[i])) { - errors++; - } - } - if (errors != 0) { - cout << "FAILED:popc" << endl; - return -1; - } else { - cout << "__popcll() checked!" << endl; - } - - cout << "popc test PASSED!" << endl; - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - HIP_ASSERT(hipFree(deviceD)); - - free(hostA); - free(hostB); - free(hostC); - free(hostD); - - return errors; -} diff --git a/tests/src/deviceLib/hip_test_ldg.cpp b/tests/src/deviceLib/hip_test_ldg.cpp deleted file mode 100644 index 3a8d5745bb..0000000000 --- a/tests/src/deviceLib/hip_test_ldg.cpp +++ /dev/null @@ -1,332 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS --gpu-architecture=sm_35 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include "hip/hip_vector_types.h" -#include "test_common.h" - -#if defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_CLANG_ONLY__) - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - - -#define WIDTH 8 -#define HEIGHT 8 - -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 8 -#define THREADS_PER_BLOCK_Y 8 -#define THREADS_PER_BLOCK_Z 1 - -using namespace std; - -template -__global__ void vectoradd_float(T* a, const T* bm, int width, int height) - -{ - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = __ldg(&bm[i]); - } -} - -int2 make_vector2(int a) { return make_int2(a, a); } - -char2 make_vector2(signed char a) { return make_char2(a, a); } - -char4 make_vector4(signed char a) { return make_char4(a, a, a, a); } - -short2 make_vector2(short a) { return make_short2(a, a); } - -ushort2 make_vector2(unsigned short a) { return make_ushort2(a, a); } - -short4 make_vector4(short a) { return make_short4(a, a, a, a); } - -int4 make_vector4(int a) { return make_int4(a, a, a, a); } - -uint2 make_vector2(unsigned int a) { return make_uint2(a, a); } - -uint4 make_vector4(unsigned int a) { return make_uint4(a, a, a, a); } - -float2 make_vector2(float a) { return make_float2(a, a); } - -float4 make_vector4(float a) { return make_float4(a, a, a, a); } - -uchar2 make_vector2(unsigned char a) { return make_uchar2(a, a); } - -uchar4 make_vector4(unsigned char a) { return make_uchar4(a, a, a, a); } - -double2 make_vector2(double a) { return make_double2(a, a); } - - -template -bool dataTypesRun() { - T* hostA; - T* hostB; - - - T* deviceA; - T* deviceB; - - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = (U)i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - - - hipLaunchKernelGGL(vectoradd_float, - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, - static_cast(deviceB), WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != (hostB[i])) { - errors++; - } - } - if (errors != 0) { - std::cout << "FAILED\n" << std::endl; - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - - free(hostA); - free(hostB); - - return ret; -} - - -template -bool dataTypesRun2() { - T* hostA; - T* hostB; - - - T* deviceA; - T* deviceB; - - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = make_vector2((U)i); - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - hipLaunchKernelGGL(vectoradd_float, - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, - static_cast(deviceB), WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i].x != (hostB[i].x) && hostA[i].y != (hostB[i].y)) { - errors++; - } - } - if (errors != 0) { - std::cout << "FAILED\n" << std::endl; - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - - free(hostA); - free(hostB); - - return ret; -} - - -template -bool dataTypesRun4() { - T* hostA; - T* hostB; - - T* deviceA; - T* deviceB; - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = make_vector4((U)i); - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - - - hipLaunchKernelGGL(vectoradd_float, - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, - static_cast(deviceB), WIDTH, HEIGHT); - - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i].x != (hostB[i].x) && hostA[i].y != (hostB[i].y) && - hostA[i].z != (hostB[i].z) && hostA[i].w != (hostB[i].w)) { - errors++; - } - } - if (errors != 0) { - std::cout << "FAILED\n" << std::endl; - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - - free(hostA); - free(hostB); - - return ret; -} - -int main() { - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - int errors; - - errors = - dataTypesRun() & dataTypesRun() & dataTypesRun() & - dataTypesRun() & dataTypesRun() & - dataTypesRun() & dataTypesRun() & - dataTypesRun() & - dataTypesRun() & dataTypesRun() & - dataTypesRun() & dataTypesRun() & - dataTypesRun(); - - if (errors == 1) { - errors = 0; - std::cout << "ldg working for single element data types\n" << std::endl; - } else { - std::cout << "Failed single element data types" << std::endl; - return -1; - } - -#if 1 - errors = dataTypesRun2() & dataTypesRun2() & - dataTypesRun2() & dataTypesRun2() & - dataTypesRun2() & dataTypesRun2() & - dataTypesRun2() & dataTypesRun2(); - - if (errors == 1) { - errors = 0; - std::cout << "ldg working for two element data types\n" << std::endl; - } else { - std::cout << "Failed two element vector data types" << std::endl; - return -1; - } -#endif - - -#if 1 - - errors = dataTypesRun4() & dataTypesRun4() & - dataTypesRun4() & dataTypesRun4() & - dataTypesRun4() & dataTypesRun4(); - - if (errors == 1) { - errors = 0; - std::cout << "ldg working for four element data types\n" << std::endl; - } else { - std::cout << "Failed four element vector data types" << std::endl; - return -1; - } -#endif - - std::cout << "ldg test PASSED \n" << std::endl; -} - -#endif diff --git a/tests/src/deviceLib/hip_test_make_type.cpp b/tests/src/deviceLib/hip_test_make_type.cpp deleted file mode 100644 index 0d2bcf8fcf..0000000000 --- a/tests/src/deviceLib/hip_test_make_type.cpp +++ /dev/null @@ -1,383 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - - -#define WIDTH 8 -#define HEIGHT 8 - -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 8 -#define THREADS_PER_BLOCK_Y 8 -#define THREADS_PER_BLOCK_Z 1 - - -__global__ void vectoradd_char1(hipLaunchParm lp, char1* a, const char1* bm, const char1* cm, - int width, int height) - -{ - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = make_char1(bm[i].x) + make_char1(cm[i].x); - } -} - -__global__ void vectoradd_char2(hipLaunchParm lp, char2* a, const char2* bm, const char2* cm, - int width, int height) - -{ - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = make_char2(bm[i].x, bm[i].y) + make_char2(cm[i].x, cm[i].y); - } -} - -__global__ void vectoradd_char3(hipLaunchParm lp, char3* a, const char3* bm, const char3* cm, - int width, int height) - -{ - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = make_char3(bm[i].x, bm[i].y, bm[i].z) + make_char3(cm[i].x, cm[i].y, cm[i].z); - } -} -__global__ void vectoradd_char4(hipLaunchParm lp, char4* a, const char4* bm, const char4* cm, - int width, int height) - -{ - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - int i = y * width + x; - if (i < (width * height)) { - a[i] = make_char4(bm[i].x, bm[i].y, bm[i].z, bm[i].w) + - make_char4(cm[i].x, cm[i].y, cm[i].z, cm[i].w); - } -} - - -#if 0 -__kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) { - - - int x = blockDimX * blockIdx.x + threadIdx.x; - int y = blockDimY * blockIdy.y + threadIdx.y; - - int i = y * width + x; - if ( i < (width * height)) { - a[i] = b[i] + c[i]; - } -} -#endif - -using namespace std; - -template -bool dataTypesRun() { - T* hostA; - T* hostB; - T* hostC; - - T* deviceA; - T* deviceB; - T* deviceC; - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - hostC = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = (T)i; - hostC[i] = (T)i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM * sizeof(T), hipMemcpyHostToDevice)); - - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - WIDTH, HEIGHT); - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != (hostB[i] + hostC[i])) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - - free(hostA); - free(hostB); - free(hostC); - - return ret; -} - -template -bool dataTypesRun() { - T* hostA; - T* hostB; - T* hostC; - - T* deviceA; - T* deviceB; - T* deviceC; - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - hostC = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = (T)i; - hostC[i] = (T)i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM * sizeof(T), hipMemcpyHostToDevice)); - - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - WIDTH, HEIGHT); - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != (hostB[i] + hostC[i])) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - - free(hostA); - free(hostB); - free(hostC); - - return ret; -} - -template -bool dataTypesRun() { - T* hostA; - T* hostB; - T* hostC; - - T* deviceA; - T* deviceB; - T* deviceC; - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - hostC = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = (T)i; - hostC[i] = (T)i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM * sizeof(T), hipMemcpyHostToDevice)); - - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - WIDTH, HEIGHT); - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != (hostB[i] + hostC[i])) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - - free(hostA); - free(hostB); - free(hostC); - - return ret; -} - -bool dataTypesRunChar4() { - char4* hostA; - char4* hostB; - char4* hostC; - - char4* deviceA; - char4* deviceB; - char4* deviceC; - - int i; - int errors; - - hostA = (T*)malloc(NUM * sizeof(T)); - hostB = (T*)malloc(NUM * sizeof(T)); - hostC = (T*)malloc(NUM * sizeof(T)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = (T)i; - hostC[i] = (T)i; - } - - HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T))); - HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T))); - - HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM * sizeof(T), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM * sizeof(T), hipMemcpyHostToDevice)); - - hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1), - dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, - WIDTH, HEIGHT); - - HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - bool ret = false; - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != (hostB[i] + hostC[i])) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - ret = false; - } else { - ret = true; - } - - HIP_ASSERT(hipFree(deviceA)); - HIP_ASSERT(hipFree(deviceB)); - HIP_ASSERT(hipFree(deviceC)); - - free(hostA); - free(hostB); - free(hostC); - - return ret; -} - -int main() { - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - cout << " System minor " << devProp.minor << endl; - cout << " System major " << devProp.major << endl; - cout << " agent prop name " << devProp.name << endl; - - int errors; - - errors = dataTypesRun() & dataTypesRun() & dataTypesRun() & - dataTypesRun(); - - - // hipResetDefaultAccelerator(); - if (errors == 1) { - passed(); - } else { - std::cout << "Failed Float" << std::endl; - return -1; - } -} diff --git a/tests/src/deviceLib/hip_test_syncthreads_and.cpp b/tests/src/deviceLib/hip_test_syncthreads_and.cpp deleted file mode 100644 index 55954b013e..0000000000 --- a/tests/src/deviceLib/hip_test_syncthreads_and.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include "test_common.h" - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -static __global__ -void kernel_syncthreads_and(int *syncTestD, - int *allThreadsZeroD, - int *allThreadsOneD, - int *oneThreadZeroD, - int *allThreadsMinusOneD) -{ - int blockSize = blockDim.x; - int predicate = 10; - // First block index starts with 0, and second block index starts - // with blockSize - int i = (blockIdx.x == 0) ? threadIdx.x : blockSize + threadIdx.x; - - // At very first, we need to ensure work-group level syncronization - // properly happened, don't bother about predicate testing for now. - // Thread 0 and thread 1 writes to shared memory. After call to api, - // every thread reads shared memory, and store product for verification - __shared__ int sm[2]; - if (threadIdx.x == 0) - sm[0] = 10; - else if (threadIdx.x == 1) - sm[1] = 20; - __syncthreads_and(predicate); - syncTestD[i] = sm[0] * sm[1]; - - // All threads pass 0 as predicate value, result should be 0 - predicate = 0; - allThreadsZeroD[i] = __syncthreads_and(predicate); - - // All threads pass 1 as predicate value, result should be 1 - predicate = 1; - allThreadsOneD[i] = __syncthreads_and(predicate); - - // Thread 0 pass 0, and all other threads 1 as predicate value, - // result should be 0 - predicate = (threadIdx.x == 0) ? 0 : 1; - oneThreadZeroD[i] = __syncthreads_and(predicate); - - // All threads pass -1 as predicate value, result should be 1 - predicate = -1; - allThreadsMinusOneD[i] = __syncthreads_and(predicate); -} - -static void test_syncthreads_and(int blockSize) -{ - int nBytes = sizeof(int) * 2 * blockSize; - int * syncTestD, *syncTestH; - int *allThreadsZeroD, *allThreadsZeroH; - int *allThreadsOneD, *allThreadsOneH; - int *oneThreadZeroD, *oneThreadZeroH; - int *allThreadsMinusOneD, *allThreadsMinusOneH; - - // Allocate device memory - ASSERT_EQUAL(hipMalloc((void**)&syncTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsZeroD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsOneD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&oneThreadZeroD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsMinusOneD, nBytes), hipSuccess); - - // Allocate host memory - ASSERT_EQUAL(hipHostMalloc((void**)&syncTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsZeroH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsOneH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&oneThreadZeroH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsMinusOneH, nBytes), hipSuccess); - - // Launch Kernel - hipLaunchKernelGGL(kernel_syncthreads_and, - 2, - blockSize, - 0, - 0, - syncTestD, - allThreadsZeroD, - allThreadsOneD, - oneThreadZeroD, - allThreadsMinusOneD); - - // Copy result from device to host - ASSERT_EQUAL(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsOneH, allThreadsOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(oneThreadZeroH, oneThreadZeroD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsMinusOneH, allThreadsMinusOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - - // Validate results for both blocks together - for (int i = 0; i < 2 * blockSize; ++i) { - ASSERT_EQUAL(syncTestH[i], 200); - ASSERT_EQUAL(allThreadsZeroH[i], 0); - ASSERT_EQUAL(allThreadsOneH[i], 1); - ASSERT_EQUAL(oneThreadZeroH[i], 0); - ASSERT_EQUAL(allThreadsMinusOneH[i], 1); - } - - // Free device memory - ASSERT_EQUAL(hipFree(syncTestD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsZeroD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsOneD), hipSuccess); - ASSERT_EQUAL(hipFree(oneThreadZeroD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsMinusOneD), hipSuccess); - - //Free host memory - ASSERT_EQUAL(hipHostFree(syncTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsZeroH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsOneH), hipSuccess); - ASSERT_EQUAL(hipHostFree(oneThreadZeroH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsMinusOneH), hipSuccess); -} - -int main() -{ - int blockSizes[] = {10, 40, 70, 130, 240, 723, 32, 64, 128, 256, 512, 1024}; - for (int i = 0; i < (sizeof(blockSizes) / sizeof(blockSizes[0])); ++i) - test_syncthreads_and(blockSizes[i]); - passed(); -} diff --git a/tests/src/deviceLib/hip_test_syncthreads_count.cpp b/tests/src/deviceLib/hip_test_syncthreads_count.cpp deleted file mode 100644 index 4af98b5823..0000000000 --- a/tests/src/deviceLib/hip_test_syncthreads_count.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include "test_common.h" - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -static __global__ -void kernel_syncthreads_count(int *syncTestD, - int *allThreadsZeroD, - int *allThreadsOneD, - int *oddThreadsOneD, - int *allThreadsMinusOneD, - int *allThreadsIdD) -{ - int blockSize = blockDim.x; - int predicate = 10; - // First block index starts with 0, and second block index starts - // with blockSize - int i = (blockIdx.x == 0) ? threadIdx.x : blockSize + threadIdx.x; - - // At very first, we need to ensure work-group level syncronization - // properly happened, don't bother about predicate testing for now. - // Thread 0 and thread 1 writes to shared memory. After call to api, - // every thread reads shared memory, and store sum for verification - __shared__ int sm[2]; - if (threadIdx.x == 0) - sm[0] = 10; - else if (threadIdx.x == 1) - sm[1] = 20; - __syncthreads_count(predicate); - syncTestD[i] = sm[0] + sm[1]; - - // All threads pass 0 as predicate value, result should be 0 - predicate = 0; - allThreadsZeroD[i] = __syncthreads_count(predicate); - - // All threads pass 1 as predicate value, result should be blockSize - predicate = 1; - allThreadsOneD[i] = __syncthreads_count(predicate); - - // Odd numbered threads pass 1, and even numbered threads pass 0, as - // predicate value, result should be blockSize / 2 - predicate = threadIdx.x % 2; - oddThreadsOneD[i] = __syncthreads_count(predicate); - - // All threads pass -1 as predicate value, result should blockSize - predicate = -1; - allThreadsMinusOneD[i] = __syncthreads_count(predicate); - - // Each thread pass its ID as predicate value, result should be blockSize - 1 - predicate = threadIdx.x; - allThreadsIdD[i] = __syncthreads_count(predicate); -} - -void test_syncthreads_count(int blockSize) -{ - int nBytes = sizeof(int) * 2 * blockSize; - int * syncTestD, *syncTestH; - int *allThreadsZeroD, *allThreadsZeroH; - int *allThreadsOneD, *allThreadsOneH; - int *oddThreadsOneD, *oddThreadsOneH; - int *allThreadsMinusOneD, *allThreadsMinusOneH; - int *allThreadsIdD, *allThreadsIdH; - - // Allocate device memory - ASSERT_EQUAL(hipMalloc((void**)&syncTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsZeroD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsOneD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&oddThreadsOneD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsMinusOneD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsIdD, nBytes), hipSuccess); - - // Allocate host memory - ASSERT_EQUAL(hipHostMalloc((void**)&syncTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsZeroH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsOneH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&oddThreadsOneH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsMinusOneH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsIdH, nBytes), hipSuccess); - - // Launch Kernel - hipLaunchKernelGGL(kernel_syncthreads_count, - 2, - blockSize, - 0, - 0, - syncTestD, - allThreadsZeroD, - allThreadsOneD, - oddThreadsOneD, - allThreadsMinusOneD, - allThreadsIdD); - - // Copy result from device to host - ASSERT_EQUAL(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsOneH, allThreadsOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(oddThreadsOneH, oddThreadsOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsMinusOneH, allThreadsMinusOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsIdH, allThreadsIdD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - - // Validate results for both the blocks together - for (int i = 0; i < 2 * blockSize; ++i) { - ASSERT_EQUAL(syncTestH[i], 30); - ASSERT_EQUAL(allThreadsZeroH[i], 0); - ASSERT_EQUAL(allThreadsOneH[i], blockSize); - ASSERT_EQUAL(oddThreadsOneH[i], blockSize / 2); - ASSERT_EQUAL(allThreadsMinusOneH[i], blockSize); - ASSERT_EQUAL(allThreadsIdH[i], (blockSize-1)); - } - - // Free device memory - ASSERT_EQUAL(hipFree(syncTestD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsZeroD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsOneD), hipSuccess); - ASSERT_EQUAL(hipFree(oddThreadsOneD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsMinusOneD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsIdD), hipSuccess); - - //Free host memory - ASSERT_EQUAL(hipHostFree(syncTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsZeroH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsOneH), hipSuccess); - ASSERT_EQUAL(hipHostFree(oddThreadsOneH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsMinusOneH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsIdH), hipSuccess); -} - -int main() -{ - int blockSizes[] = {10, 40, 70, 130, 240, 723, 32, 64, 128, 256, 512, 1024}; - for (int i = 0; i < (sizeof(blockSizes) / sizeof(blockSizes[0])); ++i) - test_syncthreads_count(blockSizes[i]); - passed(); -} diff --git a/tests/src/deviceLib/hip_test_syncthreads_or.cpp b/tests/src/deviceLib/hip_test_syncthreads_or.cpp deleted file mode 100644 index 67e69cc549..0000000000 --- a/tests/src/deviceLib/hip_test_syncthreads_or.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include "test_common.h" - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -static __global__ -void kernel_syncthreads_or(int *syncTestD, - int *allThreadsZeroD, - int *allThreadsOneD, - int *oneThreadOneD, - int *allThreadsMinusOneD) -{ - int blockSize = blockDim.x; - int predicate = 10; - // First block index starts with 0, and second block index starts - // with blockSize - int i = (blockIdx.x == 0) ? threadIdx.x : blockSize + threadIdx.x; - - // At very first, we need to ensure work-group level syncronization - // properly happened, don't bother about predicate testing for now. - // Thread 0 and thread 1 writes to shared memory. After call to api, - // every thread reads shared memory, and store subtraction for verification - __shared__ int sm[2]; - if (threadIdx.x == 0) - sm[0] = 10; - else if (threadIdx.x == 1) - sm[1] = 20; - __syncthreads_or(predicate); - syncTestD[i] = sm[1] - sm[0]; - - // All threads pass 0 as predicate value, result should be 0 - predicate = 0; - allThreadsZeroD[i] = __syncthreads_or(predicate); - - // All threads pass 1 as predicate value, result should be 1 - predicate = 1; - allThreadsOneD[i] = __syncthreads_or(predicate); - - // Thread 0 pass 1, and all other threads 0 as predicate value, - // result should be 1 - predicate = (threadIdx.x == 0) ? 1 : 0; - oneThreadOneD[i] = __syncthreads_or(predicate); - - // All threads pass -1 as predicate value, result should be 1 - predicate = -1; - allThreadsMinusOneD[i] = __syncthreads_or(predicate); -} - -static void test_syncthreads_or(int blockSize) -{ - int nBytes = sizeof(int) * 2 * blockSize; - int * syncTestD, *syncTestH; - int *allThreadsZeroD, *allThreadsZeroH; - int *allThreadsOneD, *allThreadsOneH; - int *oneThreadOneD, *oneThreadOneH; - int *allThreadsMinusOneD, *allThreadsMinusOneH; - - // Allocate device memory - ASSERT_EQUAL(hipMalloc((void**)&syncTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsZeroD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsOneD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&oneThreadOneD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc((void**)&allThreadsMinusOneD, nBytes), hipSuccess); - - // Allocate host memory - ASSERT_EQUAL(hipHostMalloc((void**)&syncTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsZeroH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsOneH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&oneThreadOneH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc((void**)&allThreadsMinusOneH, nBytes), hipSuccess); - - // Launch Kernel - hipLaunchKernelGGL(kernel_syncthreads_or, - 2, - blockSize, - 0, - 0, - syncTestD, - allThreadsZeroD, - allThreadsOneD, - oneThreadOneD, - allThreadsMinusOneD); - - // Copy result from device to host - ASSERT_EQUAL(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsOneH, allThreadsOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(oneThreadOneH, oneThreadOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(allThreadsMinusOneH, allThreadsMinusOneD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - - // Validate results for both blocks together - for (int i = 0; i < 2 * blockSize; ++i) { - ASSERT_EQUAL(syncTestH[i], 10); - ASSERT_EQUAL(allThreadsZeroH[i], 0); - ASSERT_EQUAL(allThreadsOneH[i], 1); - ASSERT_EQUAL(oneThreadOneH[i], 1); - ASSERT_EQUAL(allThreadsMinusOneH[i], 1); - } - - // Free device memory - ASSERT_EQUAL(hipFree(syncTestD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsZeroD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsOneD), hipSuccess); - ASSERT_EQUAL(hipFree(oneThreadOneD), hipSuccess); - ASSERT_EQUAL(hipFree(allThreadsMinusOneD), hipSuccess); - - //Free host memory - ASSERT_EQUAL(hipHostFree(syncTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsZeroH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsOneH), hipSuccess); - ASSERT_EQUAL(hipHostFree(oneThreadOneH), hipSuccess); - ASSERT_EQUAL(hipHostFree(allThreadsMinusOneH), hipSuccess); -} - -int main() -{ - int blockSizes[] = {10, 40, 70, 130, 240, 723, 32, 64, 128, 256, 512, 1024}; - for (int i = 0; i < (sizeof(blockSizes) / sizeof(blockSizes[0])); ++i) - test_syncthreads_or(blockSizes[i]); - passed(); -} diff --git a/tests/src/deviceLib/hip_threadfence_system.cpp b/tests/src/deviceLib/hip_threadfence_system.cpp deleted file mode 100644 index 11e84a3b87..0000000000 --- a/tests/src/deviceLib/hip_threadfence_system.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include "hip/device_functions.h" -#include "test_common.h" - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -__host__ __device__ void fence_system() { -#ifdef __HIP_DEVICE_COMPILE__ - __threadfence_system(); -#else - std::atomic_thread_fence(std::memory_order_seq_cst); -#endif -} - -__host__ __device__ void round_robin(const int id, const int num_dev, const int num_iter, - volatile int* data, volatile int* flag) { - for (int i = 0; i < num_iter; i++) { - while (*flag % num_dev != id) fence_system(); // invalid the cache for read - - (*data)++; - fence_system(); // make sure the store to data is sequenced before the store to flag - (*flag)++; - fence_system(); // invalid the cache to flush out flag - } -} - -__global__ void gpu_round_robin(const int id, const int num_dev, const int num_iter, - volatile int* data, volatile int* flag) { - round_robin(id, num_dev, num_iter, data, flag); -} - -int main() { - int num_gpus = 0; - HIP_ASSERT(hipGetDeviceCount(&num_gpus)); - if (num_gpus == 0) { - passed(); - return 0; - } - - volatile int* data; - if (hipHostMalloc(&data, sizeof(int), hipHostMallocCoherent) != hipSuccess) { - warn("Memory allocation failed. Skip test. Is SVM atomic supported?") - passed(); - return 0; - } - - constexpr int init_data = 1000; - *data = init_data; - - volatile int* flag; - if (hipHostMalloc(&flag, sizeof(int), hipHostMallocCoherent) != hipSuccess) { - warn("Memory allocation failed. Skip test. Is SVM atomic supported?") - passed(); - return 0; - } - *flag = 0; - - // number of rounds per device - constexpr int num_iter = 1000; - - // one CPU thread + 1 kernel/GPU - const int num_dev = num_gpus + 1; - - int next_id = 0; - std::vector threads; - - // create a CPU thread for the round_robin - threads.push_back(std::thread(round_robin, next_id++, num_dev, num_iter, data, flag)); - - // run one thread per GPU - dim3 dim_block(1, 1, 1); - dim3 dim_grid(1, 1, 1); - - // launch one kernel per device for the round robin - for (; next_id < num_dev; ++next_id) { - threads.push_back(std::thread([=]() { - HIP_ASSERT(hipSetDevice(next_id - 1)); - hipLaunchKernelGGL(gpu_round_robin, dim_grid, dim_block, 0, 0x0, next_id, num_dev, - num_iter, data, flag); - HIP_ASSERT(hipDeviceSynchronize()); - })); - } - - for (auto& t : threads) { - t.join(); - } - - int expected_data = init_data + num_dev * num_iter; - int expected_flag = num_dev * num_iter; - - bool passed = *data == expected_data && *flag == expected_flag; - - HIP_ASSERT(hipHostFree((void*)data)); - HIP_ASSERT(hipHostFree((void*)flag)); - - if (passed) { - passed(); - } else { - failed("Failed Verification!\n"); - } - - return 0; -} diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp deleted file mode 100644 index bb1b288856..0000000000 --- a/tests/src/deviceLib/hip_trig.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" -#include - -#include - -#define HIP_ASSERT(x) (assert((x) == hipSuccess)) - -#define LEN 512 -#define SIZE LEN << 2 - -#define TEST_DEBUG (0) - -__global__ void kernel_trig(float* In, float* sin_d, float* cos_d, float* tan_d, - float* sin_pd, float* cos_pd) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - sin_d[tid] = sinf(In[tid]); - cos_d[tid] = cosf(In[tid]); - tan_d[tid] = tanf(In[tid]); - sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]); -} - -int main() { - float *In, *sin_h, *cos_h, *tan_h, *sin_ph, *cos_ph; - float *In_d, *sin_d, *cos_d, *tan_d, *sin_pd, *cos_pd; - int errors = 0; - In = new float[LEN]; - sin_h = new float[LEN]; - cos_h = new float[LEN]; - tan_h = new float[LEN]; - sin_ph = new float[LEN]; - cos_ph = new float[LEN]; - for (int i = 0; i < LEN; i++) { - In[i] = 1.0f; - sin_h[i] = 0.0f; - cos_h[i] = 0.0f; - tan_h[i] = 0.0f; - sin_ph[i] = 0.0f; - cos_ph[i] = 0.0f; - } - HIP_ASSERT(hipMalloc((void**)&In_d, SIZE)); - HIP_ASSERT(hipMalloc((void**)&sin_d, SIZE)); - HIP_ASSERT(hipMalloc((void**)&cos_d, SIZE)); - HIP_ASSERT(hipMalloc((void**)&tan_d, SIZE)); - HIP_ASSERT(hipMalloc((void**)&sin_pd, SIZE)); - HIP_ASSERT(hipMalloc((void**)&cos_pd, SIZE)); - - hipMemcpy(In_d, In, SIZE, hipMemcpyHostToDevice); - hipLaunchKernelGGL(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, - In_d, sin_d, cos_d, tan_d, - sin_pd, cos_pd); - hipMemcpy(sin_h, sin_d, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(cos_h, cos_d, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(tan_h, tan_d, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(sin_ph, sin_pd, SIZE, hipMemcpyDeviceToHost); - hipMemcpy(cos_ph, cos_pd, SIZE, hipMemcpyDeviceToHost); - for (int i = 0; i < LEN; i++) { - if (sin_h[i] != sin_ph[i] || cos_h[i] != cos_ph[i] || tan_h[i] * cos_h[i] != sin_h[i]) { - errors++; -#if TEST_DEBUG - std::cout << "Check Failed!" << std::endl; - std::cout << " sin_h: " << sin_h[i] << " sin_ph: " << sin_ph[i] << "\n" - << " cos_h: " << cos_h[i] << " cos_ph:" << cos_ph[i] << "\n" - << " tan_h * cos_h: " << tan_h[i] * cos_h[i] << " sin_h[i]: " << sin_h[i] << "\n"; -#endif - } - } - - HIP_ASSERT(hipFree(In_d)); - HIP_ASSERT(hipFree(sin_d)); - HIP_ASSERT(hipFree(cos_d)); - HIP_ASSERT(hipFree(tan_d)); - HIP_ASSERT(hipFree(sin_pd)); - HIP_ASSERT(hipFree(cos_pd)); - - if (errors != 0) { - std::cout << "hip_trig FAILED!" << std::endl; - return -1; - } else { - std::cout << "hip_trig PASSED!" << std::endl; - } - return errors; -} diff --git a/tests/src/deviceLib/vector_test_common.h b/tests/src/deviceLib/vector_test_common.h deleted file mode 100644 index d6c75d0066..0000000000 --- a/tests/src/deviceLib/vector_test_common.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#pragma once - -#include - -template -using Enable_if_t = typename std::enable_if::type; diff --git a/tests/src/dynamicLoading/bit_extract_kernel.cpp b/tests/src/dynamicLoading/bit_extract_kernel.cpp deleted file mode 100644 index 2647a6c441..0000000000 --- a/tests/src/dynamicLoading/bit_extract_kernel.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" - -extern "C" __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* - A_d, size_t N) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N; i += stride) { -#ifdef __HIP_PLATFORM_AMD__ - C_d[i] = __bitextract_u32(A_d[i], 8, 4); -#else /* defined __HIP_PLATFORM_NVIDIA__ or other path */ - C_d[i] = ((A_d[i] & 0xf00) >> 8); -#endif - } -} diff --git a/tests/src/dynamicLoading/complex_loading_behavior.cpp b/tests/src/dynamicLoading/complex_loading_behavior.cpp deleted file mode 100644 index d83e98a882..0000000000 --- a/tests/src/dynamicLoading/complex_loading_behavior.cpp +++ /dev/null @@ -1,368 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* Test for loading device kernels from a library created with extern "C" function - */ - -/* HIT_START - * BUILD_CMD: libLazyLoad_amd %hc %S/%s -o liblazyLoad.so -I%S/.. -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_LIB_TYPE static - * BUILD_CMD: libLazyLoad_nvidia %hc %S/%s --std=c++11 -o liblazyLoad.so -I%S/.. -Xcompiler -fPIC -lpthread -shared -DTEST_SHARED_LIBRARY EXCLUDE_HIP_PLATFORM amd - * BUILD_CMD: %t %hc %S/%s --std=c++11 -o %T/%t -I%S/.. -ldl EXCLUDE_HIP_LIB_TYPE static - * TEST: %t - * HIT_END - */ - -#if !defined(TEST_SHARED_LIBRARY) - -#include -#include -#include "test_common.h" - -__global__ void vector_add(float* C, float* A, float* B, size_t N) { - size_t offset = blockIdx.x * blockDim.x + threadIdx.x; - size_t stride = blockDim.x * gridDim.x; - for (size_t i = offset; i < N; i += stride) { - C[i] = A[i] + B[i]; - } -} - -bool launch_local_kernel() { - bool testResult = true; - float *A_d, *B_d, *C_d; - float *A_h, *B_h, *C_h; - size_t N = 1000000; - size_t Nbytes = N * sizeof(float); - static int device = 0; - - HIPCHECK(hipSetDevice(device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device /*deviceID*/)); - - A_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(A_h == nullptr ? hipErrorOutOfMemory : hipSuccess); - B_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(B_h == nullptr ? hipErrorOutOfMemory : hipSuccess); - C_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(C_h == nullptr ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < N; i++) { - A_h[i] = 1.618f + i; - B_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&B_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - const unsigned blocks = 512; - const unsigned threadsPerBlock = 256; - hipLaunchKernelGGL(vector_add, dim3(blocks), dim3(threadsPerBlock), - 0, 0, C_d, A_d, B_d, N); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - for (size_t i=0; i < N ; i++) { - if (C_h[i] != (A_h[i] + B_h[i])) { - printf("data mismatch. Local kernel failed"); - testResult = false; - break; - } - } - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(B_d)); - HIPCHECK(hipFree(C_d)); - - free(A_h); - free(B_h); - free(C_h); - - std::cout << "Local kernel executed successfully\n"; - return testResult; -} - -bool launch_dynamically_loaded_kernel() { - bool testResult = true; - int ret = 1; - - void* handle = dlopen("./liblazyLoad.so", RTLD_LAZY); - - if (!handle) { - std::cout << dlerror() << "\n"; - testResult = false; - return testResult; - } - - std::cout << "loaded liblazyLoad.so\n"; - - void* sym = dlsym(handle, "lazyLoad"); - if (!sym) { - std::cout << "unable to locate lazyLoad within lazyLoad.so\n"; - std::cout << dlerror() << "\n"; - dlclose(handle); - testResult = false; - return testResult; - } - - int(*fp)() = reinterpret_cast(sym); - - ret = fp(); - - if (ret == 0) { - std::cout << "dynamic launch failed\n"; - testResult = false; - } else { - std::cout << "dynamic launch succeeded\n"; - } - - dlclose(handle); - return testResult; -} - -int main() { - bool testResult = true; - - testResult &= launch_local_kernel(); - testResult &= launch_dynamically_loaded_kernel(); - - if (testResult == true) { - passed(); - } else { - failed("One or more tests failed"); - } -} - -#else // !defined(TEST_SHARED_LIBRARY) - -#include -#include "test_common.h" - -__global__ void vAdd(float* C, float* A, float* B, size_t N) { - size_t offset = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; - size_t stride = hipBlockDim_x * hipGridDim_x; - - for (size_t i = offset; i < N; i += stride) { - C[i] = A[i] + B[i]; - } -} - -int vectorAddKernelTest() { - int testResult = 1; - float *A_d, *B_d, *C_d; - float *A_h, *B_h, *C_h; - size_t N = 1000000; - size_t Nbytes = N * sizeof(float); - static int device = 0; - - HIPCHECK(hipSetDevice(device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device /*deviceID*/)); - A_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(A_h == nullptr ? hipErrorOutOfMemory : hipSuccess); - B_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(B_h == nullptr ? hipErrorOutOfMemory : hipSuccess); - C_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(C_h == nullptr ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < N; i++) { - A_h[i] = 1.618f + i; - B_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&B_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - const unsigned blocks = 512; - const unsigned threadsPerBlock = 256; - - std::cout << "info: Launching vAdd kernel\n"; - hipLaunchKernelGGL(vAdd, dim3(blocks), dim3(threadsPerBlock), - 0, 0, C_d, A_d, B_d, N); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - for (size_t i=0; i < N ; i++) { - if (C_h[i] != (A_h[i] + B_h[i])) { - printf("info: data mismatch. vAdd kernel failed"); - testResult = 0; - break; - } - } - - if (testResult) { - std::cout << "info: vAdd kernel executed fine\n"; - } - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(B_d)); - HIPCHECK(hipFree(C_d)); - - free(A_h); - free(B_h); - free(C_h); - return testResult; -} - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hip/hip_cooperative_groups.h" - -namespace cg = cooperative_groups; - -static const uint BufferSizeInDwords = 448 * 1024 * 1024; - -__global__ void test_gws(uint* buf, uint bufSize, - long* tmpBuf, long* result) { - extern __shared__ long tmp[]; - uint offset = blockIdx.x * blockDim.x + threadIdx.x; - uint stride = gridDim.x * blockDim.x; - cg::grid_group gg = cg::this_grid(); - - long sum = 0; - - for (uint i = offset; i < bufSize; i += stride) { - sum += buf[i]; - } - - tmp[threadIdx.x] = sum; - __syncthreads(); - - if (threadIdx.x == 0) { - sum = 0; - for (uint i = 0; i < blockDim.x; i++) { - sum += tmp[i]; - } - tmpBuf[blockIdx.x] = sum; - } - - gg.sync(); - - if (offset == 0) { - for (uint i = 1; i < gridDim.x; ++i) { - sum += tmpBuf[i]; - } - *result = sum; - } -} - -int cooperativeKernelTest() { - int testResult = 1; - uint* dA; - long* dB; - long* dC; - long* Ah; - - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - - if (!deviceProp.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch!" - "skipping the test!\n"; - return testResult; - } - - uint32_t* init = new uint32_t[BufferSizeInDwords]; - - for (uint32_t i = 0; i < BufferSizeInDwords; ++i) { - init[i] = i; - } - - std::cout << "info: Launch kernel to test hipLaunchCooperativeKernel api\n"; - std::cout << "info: running on bus 0x" << deviceProp.pciBusID << " " << - deviceProp.name << "\n"; - - size_t SIZE = BufferSizeInDwords * sizeof(uint); - - HIPCHECK(hipMalloc(reinterpret_cast(&dA), SIZE)); - HIPCHECK(hipMalloc(reinterpret_cast(&dC), sizeof(long))); - HIPCHECK(hipMemcpy(dA, init, SIZE, hipMemcpyHostToDevice)); - Ah = reinterpret_cast(malloc(sizeof(long))); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - dim3 dimBlock = dim3(1); - dim3 dimGrid = dim3(1); - - int numBlocks = 0; - uint workgroups[4] = {32, 64, 128, 256}; - - for (uint i = 0; i < 4; ++i) { - dimBlock.x = workgroups[i]; - /* Calculate the device occupancy to know how many blocks can be - run concurrently */ - hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, - test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long)); - dimGrid.x = deviceProp.multiProcessorCount * std::min(numBlocks, 32); - HIPCHECK(hipMalloc(reinterpret_cast(&dB), - dimGrid.x * sizeof(long))); - - void *params[4]; - params[0] = reinterpret_cast(&dA); - params[1] = (void*)&BufferSizeInDwords; - params[2] = reinterpret_cast(&dB); - params[3] = reinterpret_cast(&dC); - - std::cout << "Testing with grid size = " << dimGrid.x << - " and block size = " << dimBlock.x << "\n"; - - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_gws), - dimGrid, dimBlock, params, - dimBlock.x * sizeof(long), stream)); - - HIPCHECK(hipMemcpy(Ah, dC, sizeof(long), hipMemcpyDeviceToHost)); - - if (*Ah != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) { - std::cout << "Data validation failed for grid size = " << dimGrid.x << - " and block size = " << dimBlock.x << "\n"; - HIPCHECK(hipFree(dB)); - std::cout << "Test failed! \n"; - testResult = 0; - break; - - } else { - std::cout << "info: data validated!\n"; - HIPCHECK(hipFree(dB)); - } - } - - if (testResult) { - std::cout <<"info: hipLaunchCooperativeKernel api executed fine\n"; - } - - HIPCHECK(hipStreamDestroy(stream)); - HIPCHECK(hipFree(dC)); - HIPCHECK(hipFree(dA)); - delete [] init; - free(Ah); - return testResult; -} - -extern "C" int lazyLoad() { - return vectorAddKernelTest() & cooperativeKernelTest(); -} - -#endif // !defined(TEST_SHARED_LIBRARY) diff --git a/tests/src/dynamicLoading/hipApiDynamicLoad.cpp b/tests/src/dynamicLoading/hipApiDynamicLoad.cpp deleted file mode 100644 index 5aadedf53b..0000000000 --- a/tests/src/dynamicLoading/hipApiDynamicLoad.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* Test is to load hip runtime using dlopen and get function pointer - * using dlsym for hip apis using dlsym() - * */ - -/* HIT_START - * BUILD_CMD: bit_extract_kernel.code %hc --genco %S/bit_extract_kernel.cpp -o bit_extract_kernel.code EXCLUDE_HIP_PLATFORM nvidia - * BUILD_CMD: %t %hc %S/%s -I%S/.. -o %T/%t -ldl EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_LIB_TYPE static - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include -#include -#include -#include - -#define fileName "bit_extract_kernel.code" - -#define LEN 64 -#define SIZE LEN * sizeof(float) - -int main(int argc, char* argv[]) { - uint32_t *A_d, *C_d; - uint32_t *A_h, *C_h; - size_t N = 1000000; - size_t Nbytes = N * sizeof(uint32_t); - - void* handle = dlopen("libamdhip64.so", RTLD_LAZY); - if (!handle) { - std::cout << dlerror() << "\n"; - failed("hip runtime failed to load from dlopen\n"); - } - - std::cout << "hip runtime loaded using dlopen\n"; - - void* sym_hipGetDevice = dlsym(handle, "hipGetDevice"); - void* sym_hipMalloc = dlsym(handle, "hipMalloc"); - void* sym_hipMemcpyHtoD = dlsym(handle, "hipMemcpyHtoD"); - void* sym_hipMemcpyDtoH = dlsym(handle, "hipMemcpyDtoH"); - void* sym_hipModuleLoad = dlsym(handle, "hipModuleLoad"); - void* sym_hipGetDeviceProperties = dlsym(handle, "hipGetDeviceProperties"); - void* sym_hipModuleGetFunction = dlsym(handle, "hipModuleGetFunction"); - void* sym_hipModuleLaunchKernel = dlsym(handle, "hipModuleLaunchKernel"); - - dlclose(handle); - hipError_t (*dyn_hipGetDevice)(hipDevice_t*, int) = reinterpret_cast - (sym_hipGetDevice); - - hipError_t (*dyn_hipMalloc)(void**, uint32_t) = reinterpret_cast - (sym_hipMalloc); - - hipError_t (*dyn_hipMemcpyHtoD)(hipDeviceptr_t, void*, size_t) = reinterpret_cast - (sym_hipMemcpyHtoD); - - hipError_t (*dyn_hipMemcpyDtoH)(void*, hipDeviceptr_t, size_t) = reinterpret_cast - (sym_hipMemcpyDtoH); - - hipError_t (*dyn_hipModuleLoad)(hipModule_t*, const char*) = reinterpret_cast - (sym_hipModuleLoad); - - hipError_t (*dyn_hipGetDeviceProperties)(hipDeviceProp_t*, int) = reinterpret_cast - (sym_hipGetDeviceProperties); - - hipError_t (*dyn_hipModuleGetFunction)(hipFunction_t*, hipModule_t, const char*) = - reinterpret_cast - (sym_hipModuleGetFunction); - - hipError_t (*dyn_hipModuleLaunchKernel)(hipFunction_t, unsigned int, unsigned int, - unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, - hipStream_t, void**, void**) = reinterpret_cast - (sym_hipModuleLaunchKernel); - - hipDevice_t device; - HIPCHECK(dyn_hipGetDevice(&device, 0)); - - hipDeviceProp_t props; - HIPCHECK(dyn_hipGetDeviceProperties(&props, device)); - printf("info: running on device #%d %s\n", device, props.name); - printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); - A_h = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(A_h != NULL); - C_h = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(C_h != NULL); - - for (size_t i = 0; i < N; i++) { - A_h[i] = i; - } - - printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); - HIPCHECK(dyn_hipMalloc(reinterpret_cast(&A_d), Nbytes)); - HIPCHECK(dyn_hipMalloc(reinterpret_cast(&C_d), Nbytes)); - - printf("info: copy Host2Device\n"); - HIPCHECK(dyn_hipMemcpyHtoD((hipDeviceptr_t)(A_d), A_h, Nbytes)); - - printf("info: launch 'bit_extract_kernel' \n"); - - struct { - void* _Cd; - void* _Ad; - size_t _N; - } args; - args._Cd = reinterpret_cast (C_d); - args._Ad = reinterpret_cast (A_d); - args._N = (size_t) N; - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; - - hipModule_t Module; - HIPCHECK(dyn_hipModuleLoad(&Module, fileName)); - - hipFunction_t Function; - HIPCHECK(dyn_hipModuleGetFunction(&Function, Module, "bit_extract_kernel")); - - HIPCHECK(dyn_hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, - reinterpret_cast(&config))); - - printf("info: copy Device2Host\n"); - HIPCHECK(dyn_hipMemcpyDtoH(C_h, (hipDeviceptr_t)(C_d), Nbytes)); - - printf("info: check result\n"); - for (size_t i = 0; i < N; i++) { - unsigned Agold = ((A_h[i] & 0xf00) >> 8); - if (C_h[i] != Agold) { - fprintf(stderr, "mismatch detected.\n"); - printf("%zu: %08x =? %08x (Ain=%08x)\n", i, C_h[i], Agold, A_h[i]); - failed("Test failed\n"); - } - } - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - free(A_h); - free(C_h); - passed(); -} diff --git a/tests/src/experimental/xcompile/gApi.c b/tests/src/experimental/xcompile/gApi.c deleted file mode 100644 index fe7d24a34d..0000000000 --- a/tests/src/experimental/xcompile/gApi.c +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "hip/hip_runtime_api.h" - -#define size 1024*1024 - -int main(){ - float *Ad; - hipMalloc((void**)&Ad, size); -} diff --git a/tests/src/experimental/xcompile/gHipApi.c b/tests/src/experimental/xcompile/gHipApi.c deleted file mode 100644 index 1c857767dc..0000000000 --- a/tests/src/experimental/xcompile/gHipApi.c +++ /dev/null @@ -1,68 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include"gHipApi.h" -#include "hip/hip_runtime_api.h" -#include"stdio.h" - -void _h2d(mem_manager *self){ - hipMemcpy(self->dev_ptr, self->hst_ptr, self->size, hipMemcpyHostToDevice); -} - -void _d2h(mem_manager *self) -{ - hipMemcpy(self->hst_ptr, self->dev_ptr, self->size, hipMemcpyDeviceToHost); -} - -void _malloc_hip(mem_manager *self) -{ - hipMalloc(&(self->dev_ptr), self->size); -} - -void _malloc_hst(mem_manager *self) -{ - self->hst_ptr = malloc(self->size); -} - -void memset_hst(mem_manager *mem, float val) -{ - float *tmp = (float*)mem->hst_ptr; - int i; - for(i=0;i<(mem->size)/sizeof(float);i++) - { - tmp[i] = val; - } -} - -mem_manager *mem_manager_start(size_t _size) -{ - mem_manager *tmp = (mem_manager*)malloc(sizeof(mem_manager)); - tmp->size = _size; - tmp->h2d = _h2d; - tmp->d2h = _d2h; - tmp->malloc_hip = _malloc_hip; - tmp->malloc_hst = _malloc_hst; - return tmp; -} - - diff --git a/tests/src/experimental/xcompile/gHipApi.h b/tests/src/experimental/xcompile/gHipApi.h deleted file mode 100644 index 1f75243798..0000000000 --- a/tests/src/experimental/xcompile/gHipApi.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#ifndef GHIPAPI_H -#define GHIPAPI_H - -#include - -typedef struct { - void* hst_ptr; - void* dev_ptr; - size_t size; - void (*h2d)(); - void (*d2h)(); - void (*malloc_hip)(); - void (*malloc_hst)(); -} mem_manager; - -mem_manager* mem_manager_start(size_t); - -void memset_hst(mem_manager*, float); - -#endif diff --git a/tests/src/experimental/xcompile/gapi.sh b/tests/src/experimental/xcompile/gapi.sh deleted file mode 100755 index ab8f481f05..0000000000 --- a/tests/src/experimental/xcompile/gapi.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gHipApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gHipApi.o - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c hHipApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o hHipApi.o - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include hHipApi.o gHipApi.o ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -lm -o gApi - - diff --git a/tests/src/experimental/xcompile/ghipapi.sh b/tests/src/experimental/xcompile/ghipapi.sh deleted file mode 100755 index 3e79821cf9..0000000000 --- a/tests/src/experimental/xcompile/ghipapi.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gHipApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gHipApi.o - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c hHipApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o hHipApi.o - -hipcc hHipApi.o gHipApi.o -o gHipApi - diff --git a/tests/src/experimental/xcompile/gxxApi.cpp b/tests/src/experimental/xcompile/gxxApi.cpp deleted file mode 100644 index 0e6540f2af..0000000000 --- a/tests/src/experimental/xcompile/gxxApi.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "hip/hip_runtime_api.h" -#include - -#define size 1024 * 1024 - -int main() { - float* Ad; - hipMalloc((void**)&Ad, size); -} diff --git a/tests/src/experimental/xcompile/gxxApi1.cpp b/tests/src/experimental/xcompile/gxxApi1.cpp deleted file mode 100644 index 42fa342da8..0000000000 --- a/tests/src/experimental/xcompile/gxxApi1.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "gxxApi1.h" -#include "hip/hip_runtime_api.h" - -void* mallocHip(size_t size) { - void* ptr; - hipMalloc(&ptr, size); - return ptr; -} diff --git a/tests/src/experimental/xcompile/gxxApi1.h b/tests/src/experimental/xcompile/gxxApi1.h deleted file mode 100644 index dc029b74a1..0000000000 --- a/tests/src/experimental/xcompile/gxxApi1.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#ifndef GXXAPI1_H -#define GXXAPI1_H -#include - -void* mallocHip(size_t size); - - -#endif diff --git a/tests/src/experimental/xcompile/gxxHipApi.cpp b/tests/src/experimental/xcompile/gxxHipApi.cpp deleted file mode 100644 index 420fda8e8a..0000000000 --- a/tests/src/experimental/xcompile/gxxHipApi.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "gxxHipApi.h" - -memManager::memManager(const memManager& obj) { - devPtr = obj.devPtr; - hstPtr = obj.hstPtr; - size = obj.size; -} - -void memManager::H2D() { hipMemcpy(devPtr, hstPtr, size, hipMemcpyHostToDevice); } - -void memManager::D2H() { hipMemcpy(hstPtr, devPtr, size, hipMemcpyDeviceToHost); } diff --git a/tests/src/experimental/xcompile/gxxHipApi.h b/tests/src/experimental/xcompile/gxxHipApi.h deleted file mode 100644 index 87c3343cb3..0000000000 --- a/tests/src/experimental/xcompile/gxxHipApi.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#ifndef GXXHIPAPI_H -#define GXXHIPAPI_H - -#include -#include "hip/hip_runtime_api.h" - -class memManager { - private: - void* devPtr; - void* hstPtr; - size_t size; - - public: - memManager(size_t size) : size(size) {} - memManager() {} - memManager(const memManager& obj); - template - void setDevPtr(T* ptr) { - devPtr = (void*)ptr; - } - - template - T* getDevPtr() { - return (T*)devPtr; - } - - template - void setHstPtr(T* ptr) { - hstPtr = (void*)ptr; - } - - template - T* getHstPtr() { - return (T*)hstPtr; - } - - void H2D(); - void D2H(); - template - void hostMemSet(T val) { - T* tmpPtr = (T*)hstPtr; - for (int i = 0; i < size / sizeof(T); i++) { - tmpPtr[i] = val; - } - } - template - void memAlloc() { - hipMalloc((void**)&devPtr, size); - } -}; - -#endif diff --git a/tests/src/experimental/xcompile/gxxapi.sh b/tests/src/experimental/xcompile/gxxapi.sh deleted file mode 100755 index 592f7956d4..0000000000 --- a/tests/src/experimental/xcompile/gxxapi.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gxxHipApi.cpp ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxHipApi.o - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c hxxHipApi.cpp ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o hxxHipApi.o - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include gxxHipApi.o hxxHipApi.o ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxApi - diff --git a/tests/src/experimental/xcompile/gxxhipapi.sh b/tests/src/experimental/xcompile/gxxhipapi.sh deleted file mode 100755 index 43321f2fe0..0000000000 --- a/tests/src/experimental/xcompile/gxxhipapi.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gxxHipApi.cpp ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxHipApi.o - -hipcc -c hxxHipApi.cpp -o hxxHipApi.o - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include gxxHipApi.o hxxHipApi.o ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxHipApi - diff --git a/tests/src/experimental/xcompile/hHip.c b/tests/src/experimental/xcompile/hHip.c deleted file mode 100644 index 7113a24066..0000000000 --- a/tests/src/experimental/xcompile/hHip.c +++ /dev/null @@ -1,63 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "gHipApi.h" -#include "hip/hip_runtime.h" - -#define LEN 1024*1024 -#define SIZE LEN * sizeof(float) - -__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd, size_t len) -{ - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if(tx < len) - { - Cd[tx] = Ad[tx] + Bd[tx]; - } -} - -int main() -{ - mem_manager *a, *b, *c; - a = mem_manager_start(SIZE); - b = mem_manager_start(SIZE); - c = mem_manager_start(SIZE); - a->malloc_hst(a); - b->malloc_hst(b); - c->malloc_hst(c); - a->malloc_hip(a); - b->malloc_hip(b); - c->malloc_hip(c); - memset_hst(a, 1.0f); - memset_hst(b, 2.0f); - a->h2d(a); - b->h2d(b); - dim3 dimGrid, dimBlock; - dimBlock.x = 1024, dimBlock.y = 1, dimBlock.z = 1; - dimGrid.x = LEN/1024, dimGrid.y = 1, dimGrid.z = 1; - hipLaunchKernel(HIP_KERNEL_NAME(Add), dimGrid, dimBlock, 0, 0, (float*)a->dev_ptr, (float*)b->dev_ptr, (float*)c->dev_ptr, LEN); - c->d2h(c); - assert(((float*)c->hst_ptr)[10] == 3.0f); - - -} diff --git a/tests/src/experimental/xcompile/hHipApi.c b/tests/src/experimental/xcompile/hHipApi.c deleted file mode 100644 index 8b37386e19..0000000000 --- a/tests/src/experimental/xcompile/hHipApi.c +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include"gHipApi.h" -#include"stdio.h" -#include "assert.h" -#define LEN 1024*1024 -#define SIZE LEN * sizeof(float) - -int main() -{ - mem_manager *a; - a = mem_manager_start(SIZE); - a->malloc_hst(a); - a->malloc_hip(a); - memset_hst(a, 1.0f); - a->h2d(a); - memset_hst(a, 0.0f); - a->d2h(a); - assert(((float*)a->hst_ptr)[10] == 1.0f); -} - - - diff --git a/tests/src/experimental/xcompile/hipapig.sh b/tests/src/experimental/xcompile/hipapig.sh deleted file mode 100755 index bb465b1824..0000000000 --- a/tests/src/experimental/xcompile/hipapig.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include gApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -lm -o gApi diff --git a/tests/src/experimental/xcompile/hipapigxx.sh b/tests/src/experimental/xcompile/hipapigxx.sh deleted file mode 100755 index 97f8080048..0000000000 --- a/tests/src/experimental/xcompile/hipapigxx.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include gxxApi.cpp ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxApi diff --git a/tests/src/experimental/xcompile/hipg.sh b/tests/src/experimental/xcompile/hipg.sh deleted file mode 100755 index a143e7d1b6..0000000000 --- a/tests/src/experimental/xcompile/hipg.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gHipApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gHipApi.o - -hipcc -c hHip.c -o hHip.o - -hipcc hHip.o gHipApi.o -o hipG - diff --git a/tests/src/experimental/xcompile/hipgapi.sh b/tests/src/experimental/xcompile/hipgapi.sh deleted file mode 100755 index 3f78d5ba33..0000000000 --- a/tests/src/experimental/xcompile/hipgapi.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -gcc -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gHipApi.c ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gHipApi.o - -hipcc -c hHipApi.c -o hHipApi.o - -hipcc hHipApi.o gHipApi.o -o hipGApi - diff --git a/tests/src/experimental/xcompile/hipgxx.sh b/tests/src/experimental/xcompile/hipgxx.sh deleted file mode 100755 index 0d3dea200f..0000000000 --- a/tests/src/experimental/xcompile/hipgxx.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gxxHipApi.cpp ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxHipApi.o - -hipcc -c hxxHip.cpp -o hxxHip.o - -hipcc hxxHip.o gxxHipApi.o -o hxxHip diff --git a/tests/src/experimental/xcompile/hipgxxapi.sh b/tests/src/experimental/xcompile/hipgxxapi.sh deleted file mode 100755 index 3b475a133b..0000000000 --- a/tests/src/experimental/xcompile/hipgxxapi.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -g++ -D__HIP_PLATFORM_AMD__= -I${HIP_PATH}/include -I${HCC_HOME}/include -c gxxHipApi.cpp ${HIP_PATH}/lib/device_util.cpp.o ${HIP_PATH}/lib/hip_device.cpp.o ${HIP_PATH}/lib/hip_error.cpp.o ${HIP_PATH}/lib/hip_event.cpp.o ${HIP_PATH}/lib/hip_hcc.cpp.o ${HIP_PATH}/lib/hip_memory.cpp.o ${HIP_PATH}/lib/hip_peer.cpp.o ${HIP_PATH}/lib/hip_stream.cpp.o ${HIP_PATH}/lib/unpinned_copy_engine.cpp.o -L${HCC_HOME}/lib -lhc_am -L${HSA_PATH}/lib -lhsa-runtime64 -lc++ -lmcwamp -ldl -o gxxHipApi.o - -hipcc -c hxxHipApi.cpp -o hxxHipApi.o - -hipcc hxxHipApi.o gxxHipApi.o -o hxxHipApi diff --git a/tests/src/experimental/xcompile/hipxxKer.cpp b/tests/src/experimental/xcompile/hipxxKer.cpp deleted file mode 100644 index e9819401ab..0000000000 --- a/tests/src/experimental/xcompile/hipxxKer.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "gxxApi1.h" - -#define len 1024 * 1024 -#define size len * sizeof(float) - -__global__ void Kern(hipLaunchParm lp, float* A) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - A[tx] += 1.0f; -} - -int main() { - float A[len]; - float* Ad; - - for (int i = 0; i < len; i++) { - A[i] = 1.0f; - } - - Ad = (float*)mallocHip(size); - memcpyHipH2D(Ad, A, size); - hipLaunchKernel(HIP_KERNEL_NAME(Kern), dim3(len / 1024), dim3(1024), 0, 0, Ad); - memcpyHipD2H(A, Ad, size); - for (int i = 0; i < len; i++) { - assert(A[i] == 2.0f); - } - - hipFree(Ad); -} diff --git a/tests/src/experimental/xcompile/hxxHip.cpp b/tests/src/experimental/xcompile/hxxHip.cpp deleted file mode 100644 index 3677c8616e..0000000000 --- a/tests/src/experimental/xcompile/hxxHip.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "gxxHipApi.h" -#include -#include "hip/hip_runtime.h" - -#define LEN 1024 * 1024 -#define SIZE LEN * sizeof(float) - -class memManager; - -template -__global__ void Add(hipLaunchParm lp, T* Ad, T* Bd, T* Cd, size_t Len) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if (tx < Len) { - Cd[tx] = Ad[tx] + Bd[tx]; - } -} - -int main() { - std::vector Vec(3); - for (int i = 0; i < Vec.size(); i++) { - Vec[i] = memManager(SIZE); - } - - for (int i = 0; i < 3; i++) { - Vec[i].setHstPtr(new float[LEN]); - Vec[i].memAlloc(); - } - - for (int i = 0; i < Vec.size() - 1; i++) { - Vec[i].hostMemSet((i + 1) * 1.0f); - Vec[i].H2D(); - } - - hipLaunchKernel(HIP_KERNEL_NAME(Add), dim3(LEN / 1024), dim3(1024), 0, 0, - Vec[0].getDevPtr(), Vec[1].getDevPtr(), Vec[2].getDevPtr(), - LEN); - - Vec[2].D2H(); - assert(Vec[0].getHstPtr()[10] + Vec[1].getHstPtr()[10] == - Vec[2].getHstPtr()[10]); -} diff --git a/tests/src/experimental/xcompile/hxxHipApi.cpp b/tests/src/experimental/xcompile/hxxHipApi.cpp deleted file mode 100644 index 52b1053890..0000000000 --- a/tests/src/experimental/xcompile/hxxHipApi.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "gxxHipApi.h" -#include -#include "hip/hip_runtime.h" - -#define LEN 1024 * 1024 -#define SIZE LEN * sizeof(float) - -class memManager; - -int main() { - std::vector Vec(4); - for (int i = 0; i < Vec.size(); i++) { - Vec[i] = memManager(SIZE); - } - - for (int i = 0; i < 4; i++) { - Vec[i].setHstPtr(new float[LEN]); - } - - for (int i = 0; i < 2; i++) { - Vec[i].memAlloc(); - } - - for (int i = 0; i < 2; i++) { - Vec[i].hostMemSet((i + 1) * 1.0f); - Vec[i].H2D(); - } - - Vec[2].setDevPtr(Vec[0].getDevPtr()); - Vec[3].setDevPtr(Vec[1].getDevPtr()); - - for (int i = 2; i < Vec.size(); i++) { - Vec[i].D2H(); - } - - assert(Vec[0].getHstPtr()[10] == Vec[2].getHstPtr()[10]); - assert(Vec[1].getHstPtr()[10] == Vec[3].getHstPtr()[10]); -} diff --git a/tests/src/g++/hipMalloc.cpp b/tests/src/g++/hipMalloc.cpp deleted file mode 100644 index cfb3ba421f..0000000000 --- a/tests/src/g++/hipMalloc.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -/* HIT_START - * BUILD_CMD: hipMalloc %cxx -D__HIP_PLATFORM_NVIDIA__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_cxx_nvidia -L/usr/local/cuda/lib64 -lcudart -std=c++11 EXCLUDE_HIP_PLATFORM amd - * BUILD_CMD: hipMalloc %cxx -D__HIP_PLATFORM_AMD__ -I%hip-path/include -I%rocm-path/include %S/%s -Wl,--rpath=%rocm-path/lib %hip-path/lib/libamdhip64.so -o %T/hipMalloc_cxx_amd -std=c++11 EXCLUDE_HIP_PLATFORM nvidia - * TEST: hipMalloc_cxx_nvidia EXCLUDE_HIP_PLATFORM amd - * TEST: hipMalloc_cxx_amd EXCLUDE_HIP_PLATFORM nvidia - * HIT_END - */ - -#include -#include - -int main() { - int* Ad; - hipMalloc((void**)&Ad, 1024); - std::cout<<"PASSED!"< -#include "LaunchKernel.h" - -bool LaunchKernelArg() -{ - dim3 blocks = {1,1,1}; - dim3 threads = {1,1,1}; - - HIPCHECK(hipLaunchKernel(getKernelFunc(mykernel), blocks, threads, NULL, 0, 0)); - - return true; -} - -bool LaunchKernelArg1() -{ - int A = 0; - int *A_d = NULL; - dim3 blocks = {1,1,1}; - dim3 threads = {1,1,1}; - - // Allocate Device memory - HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); - - void* Args[]={&A_d}; - HIPCHECK(hipLaunchKernel(getKernelFunc(mykernel1), blocks, threads, Args, 0, 0)); - - // Get the result back to host memory - HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); - - HIPCHECK(hipFree(A_d)); - - if(A != 333) - return false; - - return true; -} - -bool LaunchKernelArg2() -{ - int A = 0; - int B = 123; - int *A_d = NULL; - int *B_d = NULL; - - dim3 blocks = {1,1,1}; - dim3 threads = {1,1,1}; - - // Allocate Device memory - HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); - - HIPCHECK(hipMalloc((void**)&B_d, sizeof(int))); - - // Copy data from host memory to device memory - HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); - - void* Args[]={&A_d, &B_d}; - HIPCHECK(hipLaunchKernel(getKernelFunc(mykernel2), blocks, threads, Args,0,0)); - - // Get the result back to host memory - HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(B_d)); - - if(A != 123) - return false; - - return true; -} - -bool LaunchKernelArg3() -{ - int A = 321; - int B = 123; - int C = 0; - int *A_d = NULL; - int *B_d = NULL; - int *C_d = NULL; - - dim3 blocks = {1,1,1}; - dim3 threads = {1,1,1}; - - // Allocate Device memory - HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); - - HIPCHECK(hipMalloc((void**)&B_d, sizeof(int))); - - HIPCHECK(hipMalloc((void**)&C_d, sizeof(int))); - - // Copy data from host memory to device memory - HIPCHECK(hipMemcpy(A_d, &A, sizeof(int), hipMemcpyHostToDevice)); - - HIPCHECK(hipMemcpy(B_d, &B, sizeof(int), hipMemcpyHostToDevice)); - - void* Args[]={&A_d, &B_d, &C_d}; - HIPCHECK(hipLaunchKernel(getKernelFunc(mykernel3), blocks, threads, Args,0,0)); - - // Get the result back to host memory - HIPCHECK(hipMemcpy(&C, C_d, sizeof(int), hipMemcpyDeviceToHost)); - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(B_d)); - HIPCHECK(hipFree(C_d)); - - if(C != 444) - return false; - - return true; -} - -bool LaunchKernelArg4() -{ - int A = 0; - int *A_d = NULL; - dim3 blocks = {1,1,1}; - dim3 threads = {1,1,1}; - - // Allocate Device memory - HIPCHECK(hipMalloc((void**)&A_d, sizeof(int))); - - char c = 1; - short s = 10; - int i = 100; - struct things t = {2,20,200}; - - void* Args[]={&A_d, &c, &s, &i, &t}; - HIPCHECK(hipLaunchKernel(getKernelFunc(mykernel4), blocks, threads, Args, 0, 0)); - - // Get the result back to host memory - HIPCHECK(hipMemcpy(&A, A_d, sizeof(int), hipMemcpyDeviceToHost)); - - HIPCHECK(hipFree(A_d)); - - if (A != (c + s + i + t.c + t.s + t.i)) - return false; - - return true; -} - -int main() -{ - if( LaunchKernelArg() && - LaunchKernelArg1() && - LaunchKernelArg2() && - LaunchKernelArg3() && - LaunchKernelArg4()) - { - printf("PASSED!\n"); - } - else - printf("FAILED\n"); -} diff --git a/tests/src/gcc/LaunchKernel.h b/tests/src/gcc/LaunchKernel.h deleted file mode 100644 index 8f2c714465..0000000000 --- a/tests/src/gcc/LaunchKernel.h +++ /dev/null @@ -1,43 +0,0 @@ - -/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -struct things { - char c; - short s; - int i; -}; - -typedef enum func{ - mykernel, - mykernel1, - mykernel2, - mykernel3, - mykernel4 -}func; - -extern const void* getKernelFunc(enum func f); - -#ifdef __cplusplus -} -#endif \ No newline at end of file diff --git a/tests/src/gcc/gpu.cpp b/tests/src/gcc/gpu.cpp deleted file mode 100644 index 4e85933b5f..0000000000 --- a/tests/src/gcc/gpu.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - - -#include -#include "LaunchKernel.h" - -extern "C" -{ - -__global__ void kernel() -{ - int a = 20; -} - -__global__ void kernel1(int *a) -{ - *a = 333; -} - -__global__ void kernel2(int *a, int*b) -{ - *a = *b; -} - -__global__ void kernel3(int *a, int*b, int* c) -{ - *c = *a+*b; -} - -__global__ void kernel4(int *a, char c, short s, int i, struct things t) -{ - *a = c + s + i + t.c + t.s + t.i; -} - -const void* funcTable[] = { - (const void*)kernel, - (const void*)kernel1, - (const void*)kernel2, - (const void*)kernel3, - (const void*)kernel4 }; - -const void* getKernelFunc(enum func f){ - return funcTable[f]; - } - -}//extern "C" diff --git a/tests/src/gcc/hipMalloc.c b/tests/src/gcc/hipMalloc.c deleted file mode 100644 index 391ab1c7e6..0000000000 --- a/tests/src/gcc/hipMalloc.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -/* HIT_START - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_NVIDIA__ -I%hip-path/include -I/usr/local/cuda/include %S/%s -o %T/hipMalloc_cc_nvidia -L/usr/local/cuda/lib64 -lcudart EXCLUDE_HIP_PLATFORM amd - * BUILD_CMD: hipMalloc %cc -D__HIP_PLATFORM_AMD__ -I%hip-path/include %S/%s -Wl,--rpath=%hip-path/lib %hip-path/lib/libamdhip64.so -o %T/hipMalloc_cc_amd EXCLUDE_HIP_PLATFORM nvidia - * TEST: hipMalloc_cc_nvidia EXCLUDE_HIP_PLATFORM amd - * TEST: hipMalloc_cc_amd EXCLUDE_HIP_PLATFORM nvidia - * HIT_END - */ - - -#include -#include - -int main() -{ - int *Ad; - hipMalloc((void**)&Ad, 1024); - printf("PASSED!\n"); -} diff --git a/tests/src/hipC.cpp b/tests/src/hipC.cpp deleted file mode 100644 index cc808d4d61..0000000000 --- a/tests/src/hipC.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include "hip/hip_runtime.h" -#include "test_common.h" -#include - -#define ITER 1 << 20 -#define SIZE 1024 * 1024 * sizeof(int) - -__global__ void Iter(hipLaunchParm lp, int* Ad) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if (tx == 0) { - for (int i = 0; i < ITER; i++) { - Ad[tx] += 1; - } - } -} - -int main() { - int A = 0, *Ad; - hipMalloc((void**)&Ad, SIZE); - hipMemcpy(Ad, &A, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, 0, Ad); - hipMemcpy(&A, Ad, SIZE, hipMemcpyDeviceToHost); - passed(); -} diff --git a/tests/src/hipCKernel.c b/tests/src/hipCKernel.c deleted file mode 100644 index 891165f831..0000000000 --- a/tests/src/hipCKernel.c +++ /dev/null @@ -1,20 +0,0 @@ -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - -__global__ void Kernel(hipLaunchParm lp, float *Ad){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tx] += Ad[tx-1]; -} - -int main(){ - dim3 dimBlock; - dim3 dimGrid; - dimGrid.x = 1; - dimGrid.y = 1; - dimGrid.z = 1; - dimBlock.x = 1; - dimBlock.y = 1; - dimBlock.z = 1; - float *A; - hipLaunchKernel(HIP_KERNEL_NAME(Kernel), dimGrid, dimBlock, 0, 0, A); -} diff --git a/tests/src/hipEnvVar.cpp b/tests/src/hipEnvVar.cpp deleted file mode 100644 index f1a29bc9ad..0000000000 --- a/tests/src/hipEnvVar.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s test_common.cpp NVCC_OPTIONS -std=c++11 - * HIT_END - */ - -#include -#include -#include -#include "clara/clara.hpp" -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -using namespace std; -using namespace clara; -inline clara::Parser cmdline_parser(bool& help, std::string& env, int &device, bool &retDevCnt) { - return clara::Opt{retDevCnt} - ["-c"] - ("total number of GPUs available") | - - clara::Help{help} | - - clara::Opt{device,"device"} - ["-d"]["--device"] - ("select one GPU and return its pciBusID") | - - clara::Opt{env,"Set Env Value"} - ["-v"]["--EnvValue"] - ("send the list to HIP_VISIBLE_DEVICES env var, syntax -v="); -} - -int main(int argc, char** argv) { - bool help = false; - bool retDevCnt = false; - int c = 0; - int device = INT_MAX; - string env; - - auto cmd = cmdline_parser(help, env, device, retDevCnt); - const auto r = cmd.parse(Args{argc, argv}); - if (!r) { std::cout<<"Valid device must be >= 0"< devCount - 1)) { - printf("Selected device %d is out of bound. Devices on your system are in range %d - %d\n", - device, 0, devCount - 1); - return -1; - } - - if (retDevCnt) { - std::cout << devCount << std::endl; - } - if (device != INT_MAX) { - hipDevice_t deviceT; - hipDeviceGet(&deviceT, device); - - char pciBusId[100]; - memset(pciBusId, 0, 100); - hipDeviceGetPCIBusId(pciBusId, 100, deviceT); - - cout << pciBusId << endl; - } - exit(0); -} diff --git a/tests/src/hipEnvVarDriver.cpp b/tests/src/hipEnvVarDriver.cpp deleted file mode 100644 index bd5de426d6..0000000000 --- a/tests/src/hipEnvVarDriver.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -associated documentation files (the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, publish, distribute, -sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/* HIT_START - * BUILD: %t %s test_common.cpp LINK_OPTIONS -lpthread NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include -#include -#include "test_common.h" - -using namespace std; - -const string directed_dir = string(".") + PATH_SEPERATOR_STR + "directed_tests" + PATH_SEPERATOR_STR + "hipEnvVar"; -const string dir = string(".") + PATH_SEPERATOR_STR + "hipEnvVar"; - -int readHipEnvVar(string flags, char* buff){ - - std::cout << "\nFinding hipEnvVar in " << directed_dir << "...\n"; - FILE* directed_in = popen((directed_dir + flags).c_str(), "r"); - - if(fgets(buff, 512, directed_in) == NULL){ - std::cout << "Finding hipEnvVar in " << dir << "...\n"; - FILE* in = popen((dir + flags).c_str(), "r"); - if(fgets(buff, 512, in) == NULL){ - pclose(directed_in); - pclose(in); - return 1; - } - pclose(in); - } - std::cout << "hipEnvVar Found!\n"; - pclose(directed_in); - return 0; -} - -int getDeviceNumber(bool print_err=true) { - char buff[512]; - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - - if (readHipEnvVar(string(" -c"), buff)){ - strncpy(buff, "1", 512); - if (print_err){ - std::cerr << "The system cannot find hipEnvVar, using 1 as number of devices\n"; - } - } - if (print_err) { - std::cout << buff; - } - return atoi(buff); -} - -// Query the current device ID remotely to hipEnvVar -void getDevicePCIBusNumRemote(int deviceID, char* pciBusID) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - if (readHipEnvVar((" -d " + std::to_string(deviceID)), pciBusID)){ - std::cerr << "The system cannot find hipEnvVar\n"; - } - cout << pciBusID; - return; -} - -// Query the current device ID locally on AMD path -void getDevicePCIBusNum(int deviceID, char* pciBusID) { - hipDevice_t deviceT; - hipDeviceGet(&deviceT, deviceID); - - memset(pciBusID, 0, 512); - hipDeviceGetPCIBusId(pciBusID, 512, deviceT); -} - -int main() { - unsetenv(HIP_VISIBLE_DEVICES_STR); - unsetenv(CUDA_VISIBLE_DEVICES_STR); - std::vector devPCINum; - char pciBusID[512]; - // collect the device pci bus ID for all devices - int totalDeviceNum = getDeviceNumber(); - std::cout << "The total number of available devices is " << totalDeviceNum << std::endl - << "Valid index range is 0 - " << totalDeviceNum - 1 << std::endl; - for (int i = 0; i < totalDeviceNum; i++) { - getDevicePCIBusNum(i, pciBusID); - devPCINum.push_back(pciBusID); - std::cout << "The collected device PCI Bus ID of Device " << i << " is " << devPCINum.back() - << std::endl; - } - - // select each of the available devices to be the target device, - // query the returned device pci bus number, check if match the database - for (int i = 0; i < totalDeviceNum; i++) { - setenv("HIP_VISIBLE_DEVICES", (char*)std::to_string(i).c_str(), 1); - setenv("CUDA_VISIBLE_DEVICES", (char*)std::to_string(i).c_str(), 1); - getDevicePCIBusNumRemote(0, pciBusID); - if (devPCINum[i] == pciBusID) { - std::cout << "The returned PciBusID is not correct" << std::endl; - std::cout << "Expected " << devPCINum[i] << ", but get " << pciBusID << endl; - exit(-1); - } else { - continue; - } - } - - // check when set an invalid device number - setenv("HIP_VISIBLE_DEVICES", "1000,0,1", 1); - setenv("CUDA_VISIBLE_DEVICES", "1000,0,1", 1); - assert(getDeviceNumber(false) == 0); - - if (totalDeviceNum > 2) { - setenv("HIP_VISIBLE_DEVICES", "0,1,1000,2", 1); - setenv("CUDA_VISIBLE_DEVICES", "0,1,1000,2", 1); - assert(getDeviceNumber(false) == 2); - - setenv("HIP_VISIBLE_DEVICES", "0,1,2", 1); - setenv("CUDA_VISIBLE_DEVICES", "0,1,2", 1); - assert(getDeviceNumber(false) == 3); - // test if CUDA_VISIBLE_DEVICES will be accepted by the runtime - unsetenv(HIP_VISIBLE_DEVICES_STR); - unsetenv(CUDA_VISIBLE_DEVICES_STR); - setenv("CUDA_VISIBLE_DEVICES", "0,1,2", 1); - assert(getDeviceNumber(false) == 3); - } - - setenv("HIP_VISIBLE_DEVICES", "-100,0,1", 1); - setenv("CUDA_VISIBLE_DEVICES", "-100,0,1", 1); - assert(getDeviceNumber(false) == 0); - - std::cout << "PASSED" << std::endl; - return 0; -} \ No newline at end of file diff --git a/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/tests/src/hiprtc/hiprtcGetLoweredName.cpp deleted file mode 100644 index 8367a7bc8e..0000000000 --- a/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ /dev/null @@ -1,170 +0,0 @@ -/* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -lhiprtc NVCC_OPTIONS -lnvrtc - * TEST: %t - * HIT_END - */ -#include - -#include -#include - -#include -#include -#include - - -static constexpr const char gpu_program[]{ -R"( -__device__ int V1; // set from host code -static __global__ void f1(int *result) { *result = V1 + 10; } -namespace N1 { -namespace N2 { -__constant__ int V2; // set from host code -__global__ void f2(int *result) { *result = V2 + 20; } -} -} -template -__global__ void f3(int *result) { *result = sizeof(T); } -)"}; - -int main() -{ - using namespace std; - - hiprtcProgram prog; - hiprtcCreateProgram(&prog, gpu_program, "prog.cu", 0, nullptr, nullptr); - - vector kernel_name_vec; - vector variable_name_vec; - vector variable_initial_value; - vector expected_result; - - kernel_name_vec.push_back("&f1"); - expected_result.push_back(10 + 100); - kernel_name_vec.push_back("N1::N2::f2"); - expected_result.push_back(20 + 200); - kernel_name_vec.push_back("f3"); - expected_result.push_back(sizeof(int)); - kernel_name_vec.push_back("f3"); - expected_result.push_back(sizeof(double)); - - for (auto&& x : kernel_name_vec) hiprtcAddNameExpression(prog, x.c_str()); - - variable_name_vec.push_back("&V1"); - variable_initial_value.push_back(100); - variable_name_vec.push_back("&N1::N2::V2"); - variable_initial_value.push_back(200); - - for (auto&& x : variable_name_vec) hiprtcAddNameExpression(prog, x.c_str()); - - hipDeviceProp_t props; - int device = 0; - hipGetDeviceProperties(&props, device); -#ifdef __HIP_PLATFORM_AMD__ - std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; -#else - std::string sarg = std::string("--gpu-architecture=compute_") - + std::to_string(props.major) + std::to_string(props.minor); -#endif - const char* options[] = { - sarg.c_str() - }; - - hiprtcResult compileResult = hiprtcCompileProgram(prog, 1, options); - - // Obtain compilation log from the program. - size_t logSize; - hiprtcGetProgramLogSize(prog, &logSize); - - if (logSize) { - string log(logSize, '\0'); - hiprtcGetProgramLog(prog, &log[0]); - - cout << log << '\n'; - } - - if (compileResult != HIPRTC_SUCCESS) { failed("Compilation failed."); } - - size_t codeSize; - hiprtcGetCodeSize(prog, &codeSize); - - vector code(codeSize); - hiprtcGetCode(prog, code.data()); - - hipModule_t module; -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipInit(0)); - hipCtx_t ctx; - HIPCHECK(hipCtxCreate(&ctx, 0, device)); -#endif - hipModuleLoadData(&module, code.data()); - - hipDeviceptr_t dResult; - int hResult = 0; - hipMalloc((void **)&dResult, sizeof(hResult)); - hipMemcpyHtoD(dResult, &hResult, sizeof(hResult)); - - for (decltype(variable_name_vec.size()) i = 0; i != variable_name_vec.size(); ++i) { - const char* name; - hiprtcGetLoweredName(prog, variable_name_vec[i].c_str(), &name); - - int initial_value = variable_initial_value[i]; - - hipDeviceptr_t variable_addr; - size_t bytes{}; - hipModuleGetGlobal(&variable_addr, &bytes, module, name); - hipMemcpyHtoD(variable_addr, &initial_value, sizeof(initial_value)); - } - - for (decltype(kernel_name_vec.size()) i = 0; i != kernel_name_vec.size(); ++i) { - const char* name; - hiprtcGetLoweredName(prog, kernel_name_vec[i].c_str(), &name); - - hipFunction_t kernel; - hipModuleGetFunction(&kernel, module, name); - - struct { hipDeviceptr_t a_; } args{dResult}; - - auto size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - - hipModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, - config); - - hipMemcpyDtoH(&hResult, dResult, sizeof(hResult)); - - if (expected_result[i] != hResult) { failed("Validation failed."); } - } - - hipFree((void *)dResult); - hipModuleUnload(module); - hiprtcDestroyProgram(&prog); - -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(ctx)); -#endif - passed(); -} diff --git a/tests/src/hiprtc/saxpy.cpp b/tests/src/hiprtc/saxpy.cpp deleted file mode 100644 index 241b863341..0000000000 --- a/tests/src/hiprtc/saxpy.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -lhiprtc NVCC_OPTIONS -lnvrtc - * TEST: %t - * HIT_END - */ -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -static constexpr auto NUM_THREADS{128}; -static constexpr auto NUM_BLOCKS{32}; - -static constexpr auto saxpy{ -R"( -#include "test_header.h" -#include "test_header1.h" -extern "C" -__global__ -void saxpy(real a, realptr x, realptr y, realptr out, size_t n) -{ - size_t tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < n) { - out[tid] = a * x[tid] + y[tid] ; - } -} -)"}; - -int main() -{ - using namespace std; - - hiprtcProgram prog; - int num_headers = 2; - std::vector header_names; - std::vector header_sources; - header_names.push_back("test_header.h"); - header_names.push_back("test_header1.h"); - header_sources.push_back("#ifndef HIPRTC_TEST_HEADER_H\n#define HIPRTC_TEST_HEADER_H\ntypedef float real;\n#endif //HIPRTC_TEST_HEADER_H\n"); - header_sources.push_back("#ifndef HIPRTC_TEST_HEADER1_H\n#define HIPRTC_TEST_HEADER1_H\ntypedef float* realptr;\n#endif //HIPRTC_TEST_HEADER1_H\n"); - hiprtcCreateProgram(&prog, // prog - saxpy, // buffer - "saxpy.cu", // name - num_headers, // numHeaders - &header_sources[0], // headers - &header_names[0]); // includeNames - - hipDeviceProp_t props; - int device = 0; - hipGetDeviceProperties(&props, device); -#ifdef __HIP_PLATFORM_AMD__ - std::string sarg = std::string("--gpu-architecture=") + props.gcnArchName; -#else - std::string sarg = std::string("--gpu-architecture=compute_") - + std::to_string(props.major) + std::to_string(props.minor); -#endif - const char* options[] = { - sarg.c_str() - }; - - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, options)}; - - size_t logSize; - hiprtcGetProgramLogSize(prog, &logSize); - - if (logSize) { - string log(logSize, '\0'); - hiprtcGetProgramLog(prog, &log[0]); - - cout << log << '\n'; - } - - if (compileResult != HIPRTC_SUCCESS) { failed("Compilation failed."); } - - size_t codeSize; - hiprtcGetCodeSize(prog, &codeSize); - - vector code(codeSize); - hiprtcGetCode(prog, code.data()); - - hiprtcDestroyProgram(&prog); - - hipModule_t module; - hipFunction_t kernel; - -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipInit(0)); - hipCtx_t ctx; - HIPCHECK(hipCtxCreate(&ctx, 0, device)); -#endif - hipModuleLoadData(&module, code.data()); - hipModuleGetFunction(&kernel, module, "saxpy"); - - size_t n = NUM_THREADS * NUM_BLOCKS; - size_t bufferSize = n * sizeof(float); - - float a = 5.1f; - unique_ptr hX{new float[n]}; - unique_ptr hY{new float[n]}; - unique_ptr hOut{new float[n]}; - - for (size_t i = 0; i < n; ++i) { - hX[i] = static_cast(i); - hY[i] = static_cast(i * 2); - } - - hipDeviceptr_t dX, dY, dOut; - hipMalloc((void **)&dX, bufferSize); - hipMalloc((void **)&dY, bufferSize); - hipMalloc((void **)&dOut, bufferSize); - hipMemcpyHtoD(dX, hX.get(), bufferSize); - hipMemcpyHtoD(dY, hY.get(), bufferSize); - - struct { - float a_; - hipDeviceptr_t b_; - hipDeviceptr_t c_; - hipDeviceptr_t d_; - size_t e_; - } args{a, dX, dY, dOut, n}; - - auto size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - - hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1, - 0, nullptr, nullptr, config); - hipMemcpyDtoH(hOut.get(), dOut, bufferSize); - - for (size_t i = 0; i < n; ++i) { - if (fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i])* 1e-6) { failed("Validation failed."); } - } - - hipFree((void *)dX); - hipFree((void *)dY); - hipFree((void *)dOut); - - hipModuleUnload(module); - -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(ctx)); -#endif - passed(); -} diff --git a/tests/src/hostcall/hipHostcallFuncCall.cpp b/tests/src/hostcall/hipHostcallFuncCall.cpp deleted file mode 100644 index d7ffc04bbc..0000000000 --- a/tests/src/hostcall/hipHostcallFuncCall.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia - * HIT_END - */ - -#include - -extern "C" __device__ HIP_vector_base::Native_vec_ __ockl_call_host_function( - uint64_t fptr, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6); - -static void callee(uint64_t* output, uint64_t* input) { - output[0] = input[0] + 1; - output[1] = input[1] + input[2]; -} - -__global__ void kernel(uint64_t fptr, uint64_t* retval0, uint64_t* retval1) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - uint64_t arg0 = (uint64_t)fptr; - uint64_t arg1 = tid; - uint64_t arg2 = 42; - uint64_t arg3 = tid % 23; - uint64_t arg4 = 0; - uint64_t arg5 = 0; - uint64_t arg6 = 0; - uint64_t arg7 = 0; - - longlong2 result = {0, 0}; - if (tid % 71 != 1) { - result.data = __ockl_call_host_function(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - retval0[tid] = result.x; - retval1[tid] = result.y; - } -} - -static bool test() { - uint num_blocks = 5; - uint threads_per_block = 1000; - uint num_threads = num_blocks * threads_per_block; - - void* retval0_void; - HIPCHECK(hipHostMalloc(&retval0_void, 8 * num_threads)); - auto retval0 = reinterpret_cast(retval0_void); - for (uint i = 0; i != num_threads; ++i) { - retval0[i] = 0x23232323; - } - - void* retval1_void; - HIPCHECK(hipHostMalloc(&retval1_void, 8 * num_threads)); - auto retval1 = reinterpret_cast(retval1_void); - for (uint i = 0; i != num_threads; ++i) { - retval1[i] = 0x23232323; - } - - hipLaunchKernelGGL(kernel, dim3(num_blocks), dim3(threads_per_block), 0, 0, (uint64_t)callee, - retval0, retval1); - - hipStreamSynchronize(0); - - for (uint i = 0; i != num_threads; ++i) { - uint64_t value = retval0[i]; - if (i % 71 == 1) { - if (value != 0x23232323) { - printf("failed\n"); - return false; - } - } else { - if (value != i + 1) { - printf("failed\n"); - return false; - } - } - } - - return true; -} - -int main(int argc, char** argv) { - if (!test()) { - return 1; - } - - printf("passed\n"); - return 0; -} diff --git a/tests/src/hostcall/hipHostcallPrintThings.cpp b/tests/src/hostcall/hipHostcallPrintThings.cpp deleted file mode 100644 index 02119079c5..0000000000 --- a/tests/src/hostcall/hipHostcallPrintThings.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvidia - * HIT_END - */ - -#include - -// This is NOT a real printf test. It is a test for calling a host function -// which happens to be a wrapper around system printf. - -extern "C" __device__ HIP_vector_base::Native_vec_ __ockl_call_host_function( - ulong fptr, ulong arg0, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5, ulong arg6); - -// FuncCall service function that expects three arguments bundled in the -// request: the format string, and two uint64_t arguments. -void print_things_0(ulong* output, ulong* input) { - auto fmt = reinterpret_cast(input); - auto arg0 = input[2]; - auto arg1 = input[3]; - output[0] = fprintf(stdout, fmt, arg0, arg1); -} - -__global__ void kernel0(ulong fptr, ulong* retval) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - ulong arg0 = fptr; - - const char* str = "(%lu -> %lu)\n"; - ulong arg1 = 0; - for (int ii = 0; ii != 8; ++ii) { - arg1 |= (ulong)str[ii] << (8 * ii); - } - ulong arg2 = 0; - for (int ii = 0; ii != 7; ++ii) { - arg2 |= (ulong)str[ii + 8] << (8 * ii); - } - - ulong arg3 = 42; - ulong arg4 = tid; - ulong arg5 = 0; - ulong arg6 = 0; - ulong arg7 = 0; - - long2 result = {0, 0}; - result.data = __ockl_call_host_function(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - *retval = result.x; -} - -// FuncCall service function that expects two arguments bundled in the request: -// a kernel "name" and a uint64_t thread ID. The format string is built into the -// service function itself. -void print_things_1(ulong* output, const ulong* input) { - auto name = reinterpret_cast(input[0]); - auto tid = input[1]; - output[0] = fprintf(stdout, "kernel: %s; tid: %lu\n", name, tid); -} - -__global__ void kernel1(ulong fptr, ulong name, ulong* retval) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - ulong arg0 = fptr; - ulong arg1 = name; - ulong arg2 = tid; - ulong arg3 = 0; - ulong arg4 = 0; - ulong arg5 = 0; - ulong arg6 = 0; - ulong arg7 = 0; - - long2 result = {0, 0}; - result.data = __ockl_call_host_function(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - *retval = result.x; -} - -static bool test() { - void* retval_void; - HIPCHECK(hipHostMalloc(&retval_void, 8)); - auto retval = reinterpret_cast(retval_void); - *retval = 0x23232323; - - hipLaunchKernelGGL(kernel0, dim3(1), dim3(1), 0, 0, (ulong)print_things_0, retval); - hipStreamSynchronize(0); - if (*retval != strlen("(42 -> 0)\n")) { - return false; - } - - *retval = 0x23232323; - const char* name = "kernel1"; - hipLaunchKernelGGL(kernel1, dim3(1), dim3(1), 0, 0, (ulong)print_things_1, (ulong)name, retval); - hipStreamSynchronize(0); - if (*retval != strlen("kernel: kernel1; tid: 0\n")) { - return false; - } - - return true; -} - -int main(int argc, char** argv) { - if (!test()) { - printf("failed\n"); - return 1; - } - printf("passed\n"); - return 0; -} diff --git a/tests/src/ipc/MultiProcess.h b/tests/src/ipc/MultiProcess.h deleted file mode 100755 index 40b42e5ce2..0000000000 --- a/tests/src/ipc/MultiProcess.h +++ /dev/null @@ -1,157 +0,0 @@ -#pragma once - -#ifdef __unix__ - -#include -#include -#include -#include -#include - -template -struct Shmem { - std::atomic handle_; - std::atomic done_counter_; -}; - -template -struct ShmemMeta { - std::string shmem_name_; - int shmem_fd_; - Shmem* shmem_; -}; - -template -class MultiProcess { -public: - MultiProcess(size_t num_proc) : num_proc_(num_proc) {} - ~MultiProcess(); - - void DebugInfo(pid_t pid); - - pid_t SpawnProcess(bool debug_bkpt); - bool CreateShmem(); - - bool WriteHandleToShmem(T ipc_handle); - bool WaitTillAllChildReads(); - - bool ReadHandleFromShmem(T& ipc_handle); - bool NotifyParentDone(); - -private: - const size_t num_proc_; - bool debug_proc_; - ShmemMeta shmem_meta_obj_; -}; - -// Template Implementations -template -MultiProcess::~MultiProcess() { - if(munmap(shmem_meta_obj_.shmem_, sizeof(Shmem)) < 0) { - std::cout<<"Error Unmapping shared memory "< -void MultiProcess::DebugInfo(pid_t pid) { - const int delay = 1; - - if (pid == 0) { - std::cout<<" Child Process with ID: "< -pid_t MultiProcess::SpawnProcess(bool debug_bkpt) { - if (num_proc_ < 0) { - std::cout<<"Num Process cannot be less than 1"< -bool MultiProcess::CreateShmem() { - if (num_proc_ < 0) { - std::cout<<"Num Process cannot be less than 1"<)) != 0) { - std::cout<<"Cannot FTruncate "<*)mmap(0, sizeof(Shmem), PROT_READ | PROT_WRITE, - MAP_SHARED, shmem_meta_obj_.shmem_fd_, 0); - memset(&shmem_meta_obj_.shmem_->handle_, 0x00, sizeof(T)); - shmem_meta_obj_.shmem_->done_counter_ = -1; - - return true; -} - -template -bool MultiProcess::WriteHandleToShmem(T ipc_handle) { - memcpy(&shmem_meta_obj_.shmem_->handle_, &ipc_handle, sizeof(T)); - shmem_meta_obj_.shmem_->done_counter_ = 0; - return true; -} - -template -bool MultiProcess::WaitTillAllChildReads() { - size_t write_count = 0; - while (shmem_meta_obj_.shmem_->done_counter_ != num_proc_) { - ++write_count; - } - return true; -} - -template -bool MultiProcess::ReadHandleFromShmem(T& ipc_handle) { - size_t read_count = 0; - while (shmem_meta_obj_.shmem_->done_counter_ == -1) { - ++read_count; - } - memcpy(&ipc_handle, &shmem_meta_obj_.shmem_->handle_, sizeof(T)); - return true; -} - -template -bool MultiProcess::NotifyParentDone() { - ++shmem_meta_obj_.shmem_->done_counter_; - return true; -} - -#endif /* __unix__ */ diff --git a/tests/src/ipc/hipMultiProcIpcEvent.cpp b/tests/src/ipc/hipMultiProcIpcEvent.cpp deleted file mode 100644 index b06c010a01..0000000000 --- a/tests/src/ipc/hipMultiProcIpcEvent.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS -lrt - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "MultiProcess.h" - -void multi_process(int num_process, bool debug_process) { - -#ifdef __unix__ - - float *A_h, *B_h, *C_h; - float *A_d, *B_d, *C_d; - hipEvent_t start, stop; - size_t Nbytes = N * sizeof(float); - - MultiProcess* mProcess = new MultiProcess(num_process); - mProcess->CreateShmem(); - pid_t pid = mProcess->SpawnProcess(debug_process); - - // Parent Process - if (pid != 0) { - - unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; - if (blocks > 1024) blocks = 1024; - if (blocks == 0) blocks = 1; - - printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, - ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); - printf("iterations=%d\n", iterations); - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - // NULL stream check: - HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); - HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); - - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - - for (int i = 0; i < iterations; i++) { - //--- START TIMED REGION - long long hostStart = HipTest::get_time(); - // Record the start event - HIPCHECK(hipEventRecord(start, NULL)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - - - HIPCHECK(hipEventRecord(stop, NULL)); - HIPCHECK(hipEventSynchronize(stop)); - HIPCHECK(hipEventQuery(stop)); - long long hostStop = HipTest::get_time(); - //--- STOP TIMED REGION - - float eventMs = 1.0f; - // should fail - HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); - float hostMs = HipTest::elapsed_time(hostStart, hostStop); - - printf("host_time (gettimeofday) =%6.3fms\n", hostMs); - printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - printf("\n"); - - } - - hipIpcEventHandle_t ipc_handle; - HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); - - mProcess->WriteHandleToShmem(ipc_handle); - mProcess->WaitTillAllChildReads(); - - } else { - hipEvent_t ipc_event; - hipIpcEventHandle_t ipc_handle; - mProcess->ReadHandleFromShmem(ipc_handle); - HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle)); - - HIPCHECK(hipEventSynchronize(ipc_event)); - HIPCHECK(hipEventDestroy(ipc_event)); - mProcess->NotifyParentDone(); - } - - if (pid != 0) { - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - printf("check:\n"); - HipTest::checkVectorADD(A_h, B_h, C_h, N, true); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); - delete mProcess; - } - -#endif /* __unix__ */ - -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - multi_process((N < 64) ? N : 64, debug_test); - passed(); -} diff --git a/tests/src/ipc/hipMultiProcIpcMem.cpp b/tests/src/ipc/hipMultiProcIpcMem.cpp deleted file mode 100644 index ec579dfd7b..0000000000 --- a/tests/src/ipc/hipMultiProcIpcMem.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS -lrt - * TEST: %t --N 4 - * HIT_END - */ - -#include "test_common.h" -#include "MultiProcess.h" - -#define NUM_ELEMS 1024 -#define OFFSET 128 - -void multi_process(int num_process, bool debug_process) { - -#ifdef __unix__ - - int* ipc_dptr = nullptr; - int* ipc_hptr = nullptr; - int* ipc_out_dptr = nullptr; - int* ipc_out_hptr = nullptr; - int* ipc_offset_dptr = nullptr; - - MultiProcess* mProcess = new MultiProcess(num_process); - mProcess->CreateShmem(); - pid_t pid = mProcess->SpawnProcess(debug_process); - - // Parent Process - if (pid != 0) { - hipIpcMemHandle_t ipc_handle; - memset(&ipc_handle, 0x00, sizeof(hipIpcMemHandle_t)); - - HIPCHECK(hipMalloc((void**)&ipc_dptr, NUM_ELEMS * sizeof(int))); - // Add offset to the dev_ptr - ipc_offset_dptr = ipc_dptr + OFFSET; - // Get handle for the offsetted device_ptr - HIPCHECK(hipIpcGetMemHandle(&ipc_handle, ipc_offset_dptr)); - - ipc_hptr = new int[NUM_ELEMS]; - for (size_t idx = 0; idx < NUM_ELEMS; ++idx) { - ipc_hptr[idx] = idx; - } - - HIPCHECK(hipMemset(ipc_dptr, 0x00, (NUM_ELEMS * sizeof(int)))); - HIPCHECK(hipMemcpy(ipc_dptr, ipc_hptr, (NUM_ELEMS * sizeof(int)), hipMemcpyHostToDevice)); - - mProcess->WriteHandleToShmem(ipc_handle); - - mProcess->WaitTillAllChildReads(); - - } else { - ipc_out_hptr = new int[NUM_ELEMS]; - memset(ipc_out_hptr, 0x00, (NUM_ELEMS * sizeof(int))); - - hipIpcMemHandle_t ipc_handle; - mProcess->ReadHandleFromShmem(ipc_handle); - // Open handle to get dev_ptr - HIPCHECK(hipIpcOpenMemHandle((void**)&ipc_out_dptr, ipc_handle, hipIpcMemLazyEnablePeerAccess)); - - HIPCHECK(hipMemcpy(ipc_out_hptr, ipc_out_dptr, (NUM_ELEMS * sizeof(int)), - hipMemcpyDeviceToHost)); - for (size_t idx = 0; idx < NUM_ELEMS; ++idx) { - if (ipc_out_hptr[idx] != idx) { - std::cout<<"Failing @ idx: "<< idx << std::endl; - } - } - mProcess->NotifyParentDone(); - HIPCHECK(hipIpcCloseMemHandle(ipc_out_dptr)); - delete[] ipc_out_hptr; - } - - if (pid != 0) { - delete mProcess; - } - -#endif /* __unix__ */ - -} - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - multi_process((N < 64) ? N : 64, debug_test); - passed(); -} diff --git a/tests/src/kernel/hipDynamicShared.cpp b/tests/src/kernel/hipDynamicShared.cpp deleted file mode 100644 index 4552761d2f..0000000000 --- a/tests/src/kernel/hipDynamicShared.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -template -__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) { - T tmp; - if (groupElements < batch) - return; - // sdata[tid] += sdata[tid - batch/2] does not work when block size is - // greater than wave size because one wave may complete before another - // wave. - if (tid >= batch/2 && tid < groupElements) - tmp = sdata[tid - batch/2]; - __syncthreads(); - if (tid >= batch/2 && tid < groupElements) - sdata[tid] += tmp; - __syncthreads(); -} - -template -__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d, - size_t numElements, size_t groupElements) { - // declare dynamic shared memory - extern __shared__ double sdata0[]; - T* sdata = reinterpret_cast(sdata0); - - size_t gid = (blockIdx.x * blockDim.x + threadIdx.x); - size_t tid = threadIdx.x; - - // initialize dynamic shared memory - if (tid < groupElements) { - sdata[tid] = static_cast(tid); - } - __syncthreads(); - - // prefix sum inside dynamic shared memory - sum<512>(sdata, groupElements, tid); - sum<256>(sdata, groupElements, tid); - sum<128>(sdata, groupElements, tid); - sum<64>(sdata, groupElements, tid); - sum<32>(sdata, groupElements, tid); - sum<16>(sdata, groupElements, tid); - sum<8>(sdata, groupElements, tid); - sum<4>(sdata, groupElements, tid); - sum<2>(sdata, groupElements, tid); - C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements]; -} - -template -void testExternShared(size_t N, unsigned groupElements) { - size_t Nbytes = N * sizeof(T); - - T *A_d, *B_d, *C_d; - T *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = N/threadsPerBlock; - assert(N == blocks * threadsPerBlock); - - // printf("blocks: %d\nthreadsPerBlock: %d\nN: %zu\n", blocks, threadsPerBlock, N); - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - // calculate the amount of dynamic shared memory required - size_t groupMemBytes = groupElements * sizeof(double); - - // launch kernel with dynamic shared memory - hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel), dim3(blocks), dim3(threadsPerBlock), - groupMemBytes, 0, A_d, B_d, C_d, N, groupElements); - - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - // verify - for (size_t i = 0; i < N; ++i) { - size_t tid = (i % min(threadsPerBlock, groupElements)); - T sumFromSharedMemory = static_cast(tid * (tid + 1) / 2); - T expected = A_h[i] + B_h[i] + sumFromSharedMemory; - if (C_h[i] != expected) { - std::cout << std::fixed << std::setprecision(32); - std::cout << "At " << i << std::endl; - std::cout << " Computed:" << C_h[i] << std::endl; - std::cout << " Expected:" << expected << std::endl; - std::cout << sumFromSharedMemory << std::endl; - std::cout << A_h[i] << std::endl; - std::cout << B_h[i] << std::endl; - - failed("Failed at index:%zu\n", i); - } - } - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - // printf("info: set device to %d\n", p_gpuDevice); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - testExternShared(1024, 4); - testExternShared(1024, 8); - testExternShared(1024, 16); - testExternShared(1024, 32); - testExternShared(1024, 64); - - testExternShared(65536, 4); - testExternShared(65536, 8); - testExternShared(65536, 16); - testExternShared(65536, 32); - testExternShared(65536, 64); - - testExternShared(1024, 4); - testExternShared(1024, 8); - testExternShared(1024, 16); - testExternShared(1024, 32); - testExternShared(1024, 64); - - testExternShared(65536, 4); - testExternShared(65536, 8); - testExternShared(65536, 16); - testExternShared(65536, 32); - testExternShared(65536, 64); - - passed(); -} diff --git a/tests/src/kernel/hipDynamicShared2.cpp b/tests/src/kernel/hipDynamicShared2.cpp deleted file mode 100644 index bca1fff722..0000000000 --- a/tests/src/kernel/hipDynamicShared2.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define LEN (16 * 1024) -#define SIZE (LEN * sizeof(float)) - -__global__ void vectorAdd(float* Ad, float* Bd) { - extern __shared__ float sBd[]; - int tx = threadIdx.x; - for (int i = 0; i < LEN / 64; i++) { - sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f; - Bd[tx + i * 64] = sBd[tx + i * 64]; - } -} - -int main() { - float *A, *B, *Ad, *Bd; - A = new float[LEN]; - B = new float[LEN]; - for (int i = 0; i < LEN; i++) { - A[i] = 1.0f; - B[i] = 1.0f; - } - hipMalloc(&Ad, SIZE); - hipMalloc(&Bd, SIZE); - hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); - - hipError_t ret = hipFuncSetAttribute( - reinterpret_cast(&vectorAdd), - hipFuncAttributeMaxDynamicSharedMemorySize, SIZE); - - if (ret != hipSuccess) { - printf("Failed requesting enough shared memory size(%zu), error: '%s'(%d), ignored!\n", - SIZE, hipGetErrorString(ret), ret); - passed(); - } - - hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd); - HIPCHECK(hipGetLastError()); - hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); - for (int i = 0; i < LEN; i++) { - assert(B[i] > 1.0f && B[i] < 3.0f); - } - passed(); -} diff --git a/tests/src/kernel/hipEmptyKernel.cpp b/tests/src/kernel/hipEmptyKernel.cpp deleted file mode 100644 index 23b8e961cf..0000000000 --- a/tests/src/kernel/hipEmptyKernel.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -__global__ void Empty(int param) {} - -int main() { - hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0); - hipDeviceSynchronize(); - passed(); -} diff --git a/tests/src/kernel/hipExtLaunchKernelGGL.cpp b/tests/src/kernel/hipExtLaunchKernelGGL.cpp deleted file mode 100644 index af40ea318a..0000000000 --- a/tests/src/kernel/hipExtLaunchKernelGGL.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test the Grid_Launch syntax. - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_ext.h" -#include "test_common.h" - -struct _t { - double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j; -}; - -typedef struct _t _T; - -__global__ void sKernel(_T s, double *a) { - *a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j; -} - -__global__ void mKernel(char f, short a, int b, double c, short d, int e, double* res) { - *res = a + b + c + d + e + f; -} - -void testMixData() { - double m = 0; - double *d_m; - HIPCHECK(hipMalloc(&d_m, sizeof(double))); - int a = 1, e = 10; - short b = 2, d = 4; - double c = 3.0; - char ff = 10; - hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff, b, a, c, d, e, d_m); - HIPCHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); - if (m != 30.0) { - std::cout << "M is:: " << m << std::endl; - failed("Mismatch"); - } - hipFree(d_m); -} -void testStruct() { - double m = 0; - double *d_m; - HIPCHECK(hipMalloc(&d_m, sizeof(double))); - _T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m); - HIPCHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); - if (m != 55.0) { - std::cout << "M is:: " << m << std::endl; - failed("Mismatch"); - } - hipFree(d_m); -} - -void test(size_t N) { - size_t Nbytes = N * sizeof(int); - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), - dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - test(N); - testStruct(); - testMixData(); - passed(); -} diff --git a/tests/src/kernel/hipGridLaunch.cpp b/tests/src/kernel/hipGridLaunch.cpp deleted file mode 100644 index 933e92e9b9..0000000000 --- a/tests/src/kernel/hipGridLaunch.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test the Grid_Launch syntax. - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - - -// __device__ maps to __attribute__((hc)) -__device__ int foo(int i) { return i + 1; } - -//--- -// Syntax we would like to support with GRID_LAUNCH enabled: -template -__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N; i += stride) { - double foo = __hiloint2double(A_d[i], B_d[i]); - C_d[i] = __double2loint(foo) + __double2hiint(foo); // A_d[i] + B_d[i] ; - } -} - -int test_gl2(size_t N) { - size_t Nbytes = N * sizeof(int); - - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - - // Full vadd in one large chunk, to get things started: - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - return 0; -} - -#if __HIP__ -int test_triple_chevron(size_t N) { - size_t Nbytes = N * sizeof(int); - - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - - // Full vadd in one large chunk, to get things started: - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - vectorADD2<<>>(A_d, B_d, C_d, N); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - return 0; -} -#endif - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - test_gl2(N); - -#if __HIP__ - test_triple_chevron(N); -#endif - - passed(); -} diff --git a/tests/src/kernel/hipLanguageExtensions.cpp b/tests/src/kernel/hipLanguageExtensions.cpp deleted file mode 100644 index a8f3badaa5..0000000000 --- a/tests/src/kernel/hipLanguageExtensions.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Collection of code to make sure that various features in the hip kernel language compile. - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" - -#include - -// cudaA - -// Simple tests for variable type qualifiers: -__device__ int deviceVar; - -// TODO-HCC __constant__ not working yet. -__constant__ int constantVar1; - -__constant__ __device__ int constantVar2; - -// Test HOST space: -__host__ void foo() { printf("foo!\n"); } - -__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }; -__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }; - - -__device__ __host__ float PlusOne(float x) { return x + 1.0; } - -__global__ void MyKernel(const float* a, const float* b, float* c, - unsigned N) { - // KERNELBEGIN; - - unsigned gid = threadIdx.x; - if (gid < N) { - c[gid] = a[gid] + PlusOne(b[gid]); - } - - // KERNELEND; -} - - -void callMyKernel() { - float *a, *b, *c; - const unsigned blockSize = 256; - unsigned N = blockSize; - - hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize), 0, 0, a, b, c, N); -} - - -template -__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) { -// KERNELBEGIN; -#ifdef NOT_YET - int a = __shfl_up(x, 1); -#endif - - float x = 1.0; - float z = sin(x); -#ifdef NOT_YET - float fastZ = __sin(x); -#endif - - __syncthreads(); - - - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N; i += stride) { - C_d[i] = A_d[i] + B_d[i]; - } - - // KERNELEND; -} - - -int main() { - printf("Hello world\n"); - passed(); -} diff --git a/tests/src/kernel/hipLaunchParm.cpp b/tests/src/kernel/hipLaunchParm.cpp deleted file mode 100644 index 1cc7ef9c62..0000000000 --- a/tests/src/kernel/hipLaunchParm.cpp +++ /dev/null @@ -1,962 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -// Memory alignment is broken -// Update: with latest changes the aligment is working fine, hence enabled -#define ENABLE_ALIGNMENT_TEST_SMALL_BAR 1 - -// Packed member atribute broken -#define ENABLE_PACKED_TEST 0 - -// Update: with latest changes struct class object -// from device is working fine, hence enabled -#define ENABLE_CLASS_OBJ_ACCESS 1 - -// accessing dynamic/heap memory from device is broken -#define ENABLE_HEAP_MEMORY_ACCESS 0 - -// Update: with latest changes it's working hence enabled -#define ENABLE_USER_STL 1 - -// Update: with latest changes it's working hence enabled -#define ENABLE_OUT_OF_ORDER_INITIALIZATION 1 - -// Direct initialization of struct broken, -// ip_d9 is a pointer, uint_t*, hipLaunchKernelStruct_h9 = {'c', ip_d9}; -#define ENABLE_DECLARE_INITIALIZATION_POINTER 0 - -// Bit fields are broken -#define ENABLE_BIT_FIELDS 0 - -static const int BLOCK_DIM_SIZE = 512; - -// allocate memory on device and host for result validation -static bool *result_d, *result_h; - -static hipError_t hipMallocError = hipErrorUnknown; -static hipError_t hipHostMallocError = hipErrorUnknown; -static hipError_t hipMemsetError = hipErrorUnknown; - -static void ResultValidation() { - hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost); - - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - HIPASSERT(result_h[k] == true); - } - return; -} - -// Segregating the reset part as it was causing a problem when i put inside -// ResultValidation() function, the memory was not reset correctly for the -// tests which were disabled. -static void ResetValidationMem() { - // reset the memory to false to reuse it. - hipMemset(result_d, false, BLOCK_DIM_SIZE); - hipMemset(result_h, false, BLOCK_DIM_SIZE); - return; -} - -// This test is to verify Struct with variables -// support, read from device. -typedef struct hipLaunchKernelStruct1 { - int li; // local int - float lf; // local float - bool result; // local bool -} hipLaunchKernelStruct_t1; - -// This test is to verify struct with padding, read from device -typedef struct hipLaunchKernelStruct2 { - char c1; - long l1; - char c2; - long l2; - bool result; -} hipLaunchKernelStruct_t2; - -// This test is to verify struct with padding, read from device -typedef struct hipLaunchKernelStruct3 { - char bf1; - char bf2; - long l1; - char bf3; - bool result; -} hipLaunchKernelStruct_t3; - -// This test is to verify empty struct -typedef struct hipLaunchKernelStruct4 { - // empty struct, size will be verified from device side,size 1Byte -} hipLaunchKernelStruct_t4; - -// This test is to verify struct with pointer member variable. -typedef struct hipLaunchKernelStruct5 { - char c1; - char* cp; // char pointer -} hipLaunchKernelStruct_t5; - - -// This test is to verify struct with aligned(8), -// right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct6 { - char c1; - short int si; -} __attribute__((aligned(8))) hipLaunchKernelStruct_t6; - -// This test is to verify struct with aligned(16), -// right now it's brokenon hcc & hip-clang -typedef struct hipLaunchKernelStruct7 { - char c1; - short int si; -} __attribute__((aligned(16))) hipLaunchKernelStruct_t7; - -// This test is to verify struct with packed & aligned, -// size should be 4Bytes right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct8 { - char c1; - short int si; - bool b; -}__attribute__((packed, aligned(4))) hipLaunchKernelStruct_t8; - -// This test is to verify struct with packed, no alignment as Sam suggested -// size should be 4Bytes, right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct8A { - char c1; - short int si; - bool b; -}__attribute__((packed)) hipLaunchKernelStruct_t8A; - -// This test is to verify struct with alignment, no packing as Sam suggested -// size should be 8Bytes as no packing, right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct8B { - char c1; - short int si; - bool b; -}__attribute__((aligned(8))) hipLaunchKernelStruct_t8B; - -// This test is to verify const struct object -typedef struct hipLaunchKernelStruct9 { - char c1; - uint32_t* ip; // uint pointer -} hipLaunchKernelStruct_t9; - -// This test is to verify struct with stdint types, uintN_t -typedef struct hipLaunchKernelStruct10 { - uint64_t u64; - uint32_t u32; - uint8_t u8; -} hipLaunchKernelStruct_t10; - -// This test is to verify struct with volatile member -typedef struct hipLaunchKernelStruct11 { - int i1; - volatile unsigned int vint; -} hipLaunchKernelStruct_t11; - -// This test is to verify struct with simple class object -class base { - public: - int i = 0; - base() {} -}; -typedef struct hipLaunchKernelStruct12 { - base b; - char c1; -} hipLaunchKernelStruct_t12; - -// This test is to verify struct with __device__ func() attribute -typedef struct hipLaunchKernelStruct13 { - int i1; - __device__ int getvalue() { return i1; } -} hipLaunchKernelStruct_t13; - -// This test is to verify struct with array variable, -// write to from device -typedef struct hipLaunchKernelStruct14 { - int readint; - int writeint[BLOCK_DIM_SIZE]; // will write to this from device -} hipLaunchKernelStruct_t14; - -// This test is to verify struct with dynamic memory, new int -// the heap memory will be accessed from device -typedef struct hipLaunchKernelStruct15 { - char c1; - int* heapmem; // allocated using hipMalloc() -} hipLaunchKernelStruct_t15; - -// This test is to verify simple template struct -template -struct hipLaunchKernelStruct_t16 { - T t1; -}; - -// This test is to verify simple explicity template struct -template struct hipLaunchKernelStruct_t17 {}; -template<> // explicit template -struct hipLaunchKernelStruct_t17 { - int t1; -}; - -// This test is to verity write to struct memory using __device__ func() -typedef struct hipLaunchKernelStruct18 { - char c1; - __device__ void setChar(char c) { c1 = c; } - __device__ int getChar() { return c1; } -} hipLaunchKernelStruct_t18; - -// This test is to verity user defined STL, simple stack implementation -typedef struct stackNode { - int data; - stackNode* nextNode = NULL; -} stackNode_t; -typedef struct hipLaunchKernelStruct19 { - stackNode_t* stack = NULL; - unsigned int size_ = 0; - void pushMe(int value) { // not a device function, setting from host - stackNode_t* newNode; - hipMalloc((void**)&newNode, sizeof(stackNode_t)); - hipMemset(&newNode->data, value, sizeof(stackNode_t)); - //newNode->data = value; - ++size_; - if (stack == NULL) { - stack = newNode; - return; - } - stackNode_t* currentHead = stack; - stack = newNode; - stack->nextNode = currentHead; - return; - } - __device__ void popMe() { - stackNode_t* currentHead = stack; - stack = stack->nextNode; - --size_; - // delete currentHead; // no idea why delete not working - return; - } - int stackSize() { - return size_; - } -} hipLaunchKernelStruct_t19; - -// This test is to verify out of order initalizer of struct elements -// and access in-order, from device. -typedef struct hipLaunchKernelStruct20 { - char name; - int age; - int rank; -} hipLaunchKernelStruct_t20; - -// This test is to verify bit fields operations -// the size should be 1Bytes -typedef struct hipLaunchKernelStruct21 { - int i : 3; // limiting bits to 3 - int j : 2; // limiting bits to 2 -} hipLaunchKernelStruct_t21; - -// Passing struct to a hipLaunchKernelGGL(), -// read and write into the same struct -__global__ void hipLaunchKernelStructFunc1( - hipLaunchKernelStruct_t1 hipLaunchKernelStruct_, - bool* result_d1) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d1[x] = ((hipLaunchKernelStruct_.li == 1) - && (hipLaunchKernelStruct_.lf == 1.0) - && (hipLaunchKernelStruct_.result == false)); -} - -// Passing struct to a hipLaunchKernelGGL(), checks padding, -// read and write into the same struct -__global__ void hipLaunchKernelStructFunc2( - hipLaunchKernelStruct_t2 hipLaunchKernelStruct_, - bool* result_d2) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d2[x] = ((hipLaunchKernelStruct_.c1 == 'a') - && (hipLaunchKernelStruct_.l1 == 1.0) - && (hipLaunchKernelStruct_.c2 == 'b') - && (hipLaunchKernelStruct_.l2 == 2.0) ); -} - -// Passing struct to a hipLaunchKernelGGL(), checks padding, -// read and write into the same struct -__global__ void hipLaunchKernelStructFunc3( - hipLaunchKernelStruct_t3 hipLaunchKernelStruct_, - bool* result_d3) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d3[x] = ((hipLaunchKernelStruct_.bf1 == 1) - && (hipLaunchKernelStruct_.bf2 == 1) - && (hipLaunchKernelStruct_.l1 == 1.0) - && (hipLaunchKernelStruct_.bf3 == 1) ); -} - -// Passing empty struct to a hipLaunchKernelGGL(), -// check the size of 1Byte, set result_d4 to true if condition met -__global__ void hipLaunchKernelStructFunc4( - hipLaunchKernelStruct_t4 hipLaunchKernelStruct_, - bool* result_d4) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d4[x] = (sizeof(hipLaunchKernelStruct_) == 1); -} - -// Passing struct with pointer object to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc5( - hipLaunchKernelStruct_t5 hipLaunchKernelStruct_, - bool* result_d5) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d5[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (*hipLaunchKernelStruct_.cp == 'p')); -} - -// Passing struct which is aligned to 8Byte to a hipLaunchKernelGGL(), -// set the result_d6 to true if condition met -__global__ void hipLaunchKernelStructFunc6( - hipLaunchKernelStruct_t6 hipLaunchKernelStruct_, - bool* result_d6) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // get the address of the struct - // size_t(p)%8 will be 0 if aligned to 8Byte address space - int *p = (int*)(&hipLaunchKernelStruct_); - result_d6[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%8 ==0)); -} - -// Passing struct which is aligned to 16Byte, -// set the result_d7 to true if condition met -__global__ void hipLaunchKernelStructFunc7( - hipLaunchKernelStruct_t7 hipLaunchKernelStruct_, - bool* result_d7) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // get the address of the struct - // size_t(p)%16 will be 0 if aligned to 16Byte address space - int *p = (int*)(&hipLaunchKernelStruct_); - result_d7[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%16 ==0) ); -} - -// Passing struct which is packed & aligned to 4Byte, -// set the result_d8 to true if condition met -__global__ void hipLaunchKernelStructFunc8( - hipLaunchKernelStruct_t8 hipLaunchKernelStruct_, - bool* result_d8) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // get the address of the xth element, struct[x], - // size_t(p)%4 will be 0 if aligned to 4Byte address space - int *p = (int*)(&hipLaunchKernelStruct_); - result_d8[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%4 ==0) - && (sizeof(hipLaunchKernelStruct_) == 4)); -} - -// Passing struct which is packed only, as Sam suggested, should be 4Bytes -// set the result_d8A to true if condition met -__global__ void hipLaunchKernelStructFunc8A( - hipLaunchKernelStruct_t8A hipLaunchKernelStruct_, - bool* result_d8A) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // this is packed struct - // the address will not be aglined in this case hence condition removed - // only sizeof(hipLaunchKernelStruct_) will be valided - result_d8A[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && (sizeof(hipLaunchKernelStruct_) == 4)); -} - -// Passing struct which is aligned(4) only, as Sam suggested -// , size should be 8Bytes, set the result_d8B to true if condition met -__global__ void hipLaunchKernelStructFunc8B( - hipLaunchKernelStruct_t8B hipLaunchKernelStruct_, - bool* result_d8B) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // get the address of the xth element, struct[x], - // size_t(p)%4 will be 0 if aligned to 4Byte address space - int *p = (int*)(&hipLaunchKernelStruct_); - result_d8B[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%8 == 0) - && (sizeof(hipLaunchKernelStruct_) == 8)); -} - -// Passing struct with uint pointer object to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc9( - const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_, - bool* result_d9) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d9[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (*hipLaunchKernelStruct_.ip == 1)); -} - -// Passing struct with stdint types object, uintN_t, to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc10( - hipLaunchKernelStruct_t10 hipLaunchKernelStruct_, - bool* result_d10) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d10[x] = ((hipLaunchKernelStruct_.u64 == UINT64_MAX) - && (hipLaunchKernelStruct_.u32 == 1) - && (hipLaunchKernelStruct_.u8 == UINT8_MAX)); -} - -// Passing struct with volatile member, to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc11( - hipLaunchKernelStruct_t11 hipLaunchKernelStruct_, - bool* result_d11) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d11[x] = ((hipLaunchKernelStruct_.i1 == 1) - && (hipLaunchKernelStruct_.vint == 0)); -} - -// Passing struct with simple class obj, to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc12( - hipLaunchKernelStruct_t12 hipLaunchKernelStruct_, - bool* result_d12) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d12[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.b.i == 0)); -} - -// Passing struct with simple __device__ func(), to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc13( - hipLaunchKernelStruct_t13 hipLaunchKernelStruct_, - bool* result_d13) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d13[x] = ((hipLaunchKernelStruct_.i1 == 1) - && (hipLaunchKernelStruct_.getvalue() == 1)); -} - -// Passing struct with array variable, write to from device -__global__ void hipLaunchKernelStructFunc14( - hipLaunchKernelStruct_t14 hipLaunchKernelStruct_, - bool* result_d14) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - hipLaunchKernelStruct_.writeint[x] = 1; - // set the result to true if the condition met - result_d14[x] = ((hipLaunchKernelStruct_.readint == 1) - && (hipLaunchKernelStruct_.writeint[x] == 1)); -} - -// Passing struct with struct with dynamic memory, new int -// the heap memory will be accessed from device -__global__ void hipLaunchKernelStructFunc15( - hipLaunchKernelStruct_t15 hipLaunchKernelStruct_, - bool* result_d15) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d15[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.heapmem[x] == 1)); -} - -// Passing simple template struct -__global__ void hipLaunchKernelStructFunc16( - hipLaunchKernelStruct_t16 hipLaunchKernelStruct_, - bool* result_d16) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d16[x] = (hipLaunchKernelStruct_.t1 == 'c'); -} - -// Passing simple explicit template struct -__global__ void hipLaunchKernelStructFunc17( - hipLaunchKernelStruct_t17 hipLaunchKernelStruct_, - bool* result_d17) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d17[x] = (hipLaunchKernelStruct_.t1 == 1); -} - -// Passing struct and write to struct memory using __device__ func() -__global__ void hipLaunchKernelStructFunc18( - hipLaunchKernelStruct_t18 hipLaunchKernelStruct_, - bool* result_d18) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - hipLaunchKernelStruct_.setChar('c'); - // set the result to true if the condition met - result_d18[x] = (hipLaunchKernelStruct_.getChar() == 'c'); -} - -// Passing simple user defined stack implemenration, using __device__ func() -__global__ void hipLaunchKernelStructFunc19( - hipLaunchKernelStruct_t19 hipLaunchKernelStruct_) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // stack should be empty after the kernel execustion, verify on host side - hipLaunchKernelStruct_.popMe(); -} - -// Passing out of order initalized struct, access in-order -__global__ void hipLaunchKernelStructFunc20( - hipLaunchKernelStruct_t20 hipLaunchKernelStruct_, - bool* result_d20) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // accessing struct members in order - result_d20[x] = (hipLaunchKernelStruct_.name == 'A' - // strcmp(hipLaunchKernelStruct_.name, "AMD") -> strcmp is not broken - && hipLaunchKernelStruct_.age == 42 - && hipLaunchKernelStruct_.rank == 2); -} - -// Passing struct with bit fields -__global__ void hipLaunchKernelStructFunc21( - hipLaunchKernelStruct_t21 hipLaunchKernelStruct_, - bool* result_d21) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // accessing struct members in order - result_d21[x] = (hipLaunchKernelStruct_.i == 2 - && hipLaunchKernelStruct_.j == 0 - && (sizeof(hipLaunchKernelStruct_) == 1)); -} - -__global__ void vAdd(float* a) {} - -template -__global__ void myKernel(T1 a, T2 b) {} - - -//--- -// Some wrapper macro for testing: -#define WRAP(...) __VA_ARGS__ - -#define MY_LAUNCH_MACRO(cmd, elapsed, quiet) \ - do { \ - hipDeviceSynchronize(); \ - cmd; \ - hipDeviceSynchronize(); \ - } while (0); - - -#define MY_LAUNCH(command, doTrace, msg) \ - { \ - if (doTrace) printf("TRACE: %s %s\n", msg, #command); \ - command; \ - } - - -#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg) \ - { \ - if (doTrace) printf("TRACE: %s %s\n", msg, #command); \ - (command); \ - } - - -int main() { - hipMallocError = hipMalloc((void**)&result_d, BLOCK_DIM_SIZE*sizeof(bool)); - hipHostMallocError = hipHostMalloc((void**)&result_h, BLOCK_DIM_SIZE*sizeof(bool)); - hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE); - - // Validating memory & initial value, for result_d, result_h - HIPASSERT(hipMallocError == hipSuccess); - HIPASSERT(hipHostMallocError == hipSuccess); - HIPASSERT(hipMemsetError == hipSuccess); - - // Test: Passing Struct type, check access from device. - ResetValidationMem(); - hipLaunchKernelStruct_t1 hipLaunchKernelStruct_h1; - hipLaunchKernelStruct_h1.li = 1; - hipLaunchKernelStruct_h1.lf = 1.0; - hipLaunchKernelStruct_h1.result = false; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc1), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h1, - result_d); - ResultValidation(); - - // Test: Passing Struct type, checks padding - ResetValidationMem(); - hipLaunchKernelStruct_t2 hipLaunchKernelStruct_h2; - hipLaunchKernelStruct_h2.c1 = 'a'; - hipLaunchKernelStruct_h2.l1 = 1.0; - hipLaunchKernelStruct_h2.c2 = 'b'; - hipLaunchKernelStruct_h2.l2 = 2.0; - hipLaunchKernelStruct_h2.result = false; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc2), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h2, - result_d); - ResultValidation(); - - // Test: Passing Struct type, checks padding, assigning integer to a char - ResetValidationMem(); - hipLaunchKernelStruct_t3 hipLaunchKernelStruct_h3; - hipLaunchKernelStruct_h3.bf1 = 1; - hipLaunchKernelStruct_h3.bf2 = 1; - hipLaunchKernelStruct_h3.l1 = 1.0; - hipLaunchKernelStruct_h3.bf3 = 1; - hipLaunchKernelStruct_h3.result = false; - // initialize to false, will be set to - // true if the struct size is 1Byte, from device size - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc3), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h3, - result_d); - ResultValidation(); - - // Test: Passing empty struct - ResetValidationMem(); - hipLaunchKernelStruct_t4 hipLaunchKernelStruct_h4; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc4), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h4, - result_d); - ResultValidation(); - - // Test: Passing struct with pointer object to a hipLaunchKernelGGL() - ResetValidationMem(); - hipLaunchKernelStruct_t5 hipLaunchKernelStruct_h5; - char* cp_d5; // This is passed as pointer to struct member - // allocating memory for char pointer on device - HIPCHECK(hipMalloc((void**)&cp_d5, sizeof(char))); - HIPCHECK(hipMemset(cp_d5, 'p', sizeof(char))); - hipLaunchKernelStruct_h5.c1 = 'c'; - hipLaunchKernelStruct_h5.cp = cp_d5; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc5), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h5, - result_d); - ResultValidation(); - - // Test: Passing struct with aligned(8) - ResetValidationMem(); - hipLaunchKernelStruct_t6 hipLaunchKernelStruct_h6; - hipLaunchKernelStruct_h6.c1 = 'c'; - hipLaunchKernelStruct_h6.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc6), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h6, - result_d); - // alignment is broken hence disabled the validation part - #if ENABLE_ALIGNMENT_TEST_SMALL_BAR - ResultValidation(); - #endif - - - // Test: Passing struct with aligned(16) - ResetValidationMem(); - hipLaunchKernelStruct_t7 hipLaunchKernelStruct_h7; - hipLaunchKernelStruct_h7.c1 = 'c'; - hipLaunchKernelStruct_h7.si = 1; - #if ENABLE_ALIGNMENT_TEST_SMALL_BAR // This is broken on small bar - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc7), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h7, - result_d); - ResultValidation(); - #endif - - // Test: Passing struct with packed aligned to 4Bytes - ResetValidationMem(); - hipLaunchKernelStruct_t8 hipLaunchKernelStruct_h8; - hipLaunchKernelStruct_h8.c1 = 'c'; - hipLaunchKernelStruct_h8.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h8, - result_d); - // packed member broken on large and small bar setup. - #if ENABLE_PACKED_TEST - ResultValidation(); - #endif - - // Test: Passing struct with packed to 4Bytes - ResetValidationMem(); - hipLaunchKernelStruct_t8A hipLaunchKernelStruct_h8A; - hipLaunchKernelStruct_h8A.c1 = 'c'; - hipLaunchKernelStruct_h8A.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8A), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h8A, - result_d); - // packed member broken on large and small bar setup. - #if ENABLE_PACKED_TEST - ResultValidation(); - #endif - - // Test: Passing struct with aligned(4) to 4Bytes, size is 8Bytes - ResetValidationMem(); - hipLaunchKernelStruct_t8B hipLaunchKernelStruct_h8B; - hipLaunchKernelStruct_h8B.c1 = 'c'; - hipLaunchKernelStruct_h8B.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8B), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h8B, - result_d); - // alignment is broken hence disabled the validation part - #if ENABLE_ALIGNMENT_TEST_SMALL_BAR - ResultValidation(); - #endif - - // Test: Passing const struct object to a hipLaunchKernelGGL() - ResetValidationMem(); - uint32_t* ip_d9; - // allocating memory for char pointer on device - HIPCHECK(hipMalloc((void**)&ip_d9, sizeof(uint32_t))); - HIPCHECK(hipMemset(ip_d9, 1, sizeof(uint32_t))); - // ip_d9 passed as pointer to struct member, struct.ip = &ip_d9 - const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_h9 = {'c', ip_d9}; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc9), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h9, - result_d); - #if ENABLE_DECLARE_INITIALIZATION_POINTER - ResultValidation(); - #endif - - - // Test: Passing struct with uintN_t as member variables - ResetValidationMem(); - hipLaunchKernelStruct_t10 hipLaunchKernelStruct_h10; - hipLaunchKernelStruct_h10.u64 = UINT64_MAX; - hipLaunchKernelStruct_h10.u32 = 1; - hipLaunchKernelStruct_h10.u8 = UINT8_MAX; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc10), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h10, - result_d); - ResultValidation(); - - - // Test: Passing struct with uintN_t as member variables - ResetValidationMem(); - hipLaunchKernelStruct_t11 hipLaunchKernelStruct_h11; - hipLaunchKernelStruct_h11.i1 = 1; - hipLaunchKernelStruct_h11.vint = 0; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc11), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h11, - result_d); - ResultValidation(); - - // Test: Passing struct with simple class object - ResetValidationMem(); - hipLaunchKernelStruct_t12 hipLaunchKernelStruct_h12; - hipLaunchKernelStruct_h12.c1 = 'c'; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc12), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h12, - result_d); - #if ENABLE_CLASS_OBJ_ACCESS // access class obj from device broken - // Validation part of the struct, hipLaunchKernelStructFunc12 - ResultValidation(); - #endif - - // Test: Passing struct with simple __device__ func() - ResetValidationMem(); - hipLaunchKernelStruct_t13 hipLaunchKernelStruct_h13; - hipLaunchKernelStruct_h13.i1 = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc13), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h13, - result_d); - ResultValidation(); - - // Test: Passing struct with array variable, write to from device - ResetValidationMem(); - hipLaunchKernelStruct_t14 hipLaunchKernelStruct_h14; - hipLaunchKernelStruct_h14.readint = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc14), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h14, - result_d); - ResultValidation(); - - - // Test: Passing struct with heap memory, read to from device - ResetValidationMem(); - hipLaunchKernelStruct_t15 hipLaunchKernelStruct_h15; - hipLaunchKernelStruct_h15.c1 = 'c'; - - #if ENABLE_HEAP_MEMORY_ACCESS // causing page fault here, - // on small bar set - HIPCHECK(hipMalloc(&hipLaunchKernelStruct_h15.heapmem, - BLOCK_DIM_SIZE*sizeof(int))); - HIPCHECK(hipMemset(&hipLaunchKernelStruct_h15.heapmem, - 0, BLOCK_DIM_SIZE)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc15), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h15, - result_d); - ResultValidation(); - #endif - - // Test: Passing simple template struct - ResetValidationMem(); - hipLaunchKernelStruct_t16 hipLaunchKernelStruct_h16; - hipLaunchKernelStruct_h16.t1 = 'c'; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc16), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h16, - result_d); - ResultValidation(); - - // Test: Passing simple explicit template struct - ResetValidationMem(); - hipLaunchKernelStruct_t17 hipLaunchKernelStruct_h17; - hipLaunchKernelStruct_h17.t1 = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc17), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h17, - result_d); - ResultValidation(); - - // Test: Passing struct with simple __device__ func() to struct memory - ResetValidationMem(); - hipLaunchKernelStruct_t18 hipLaunchKernelStruct_h18; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc18), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h18, - result_d); - ResultValidation(); - - // Test: Passing user defined stack, - ResetValidationMem(); - hipLaunchKernelStruct_t19 hipLaunchKernelStruct_h19; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc19), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h19); - #if ENABLE_USER_STL - // Validation part of the struct, hipLaunchKernelStructFunc19 - HIPASSERT(hipLaunchKernelStruct_h19.stackSize() == 0); - #endif - - // Test: Passing struct which is initiazed out of order - // accessing same elements in order from device - ResetValidationMem(); - hipLaunchKernelStruct_t20 hipLaunchKernelStruct_h20; - hipLaunchKernelStruct_h20.name = 'A'; - hipLaunchKernelStruct_h20.rank = 2; - hipLaunchKernelStruct_h20.age = 42; - bool *result_d20, *result_h20; - #if ENABLE_OUT_OF_ORDER_INITIALIZATION - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc20), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h20, result_d); - ResultValidation(); - #endif - - // Test: Passing struct with bit fields operation - // accessing same elements in order from device - ResetValidationMem(); - hipLaunchKernelStruct_t21 hipLaunchKernelStruct_h21 = - // out of order initalization - {2,0}; - bool *result_d21, *result_h21; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc21), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h21, result_d); - #if ENABLE_BIT_FIELDS - ResultValidation(); - #endif - - // Test: Passing the different hipLaunchParm options: - float* Ad; - hipMalloc((void**)&Ad, 1024); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), size_t(1024), 1, 0, 0, Ad); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), 1024, dim3(1), 0, 0, Ad); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), 1, 0, 0, Ad); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), dim3(1), 0, 0, Ad); - - // Test: Passing macro to hipLaunchKernelGGL -#define KERNEL_CONFIG dim3(1024), dim3(1), 0, 0 - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), KERNEL_CONFIG, Ad); - - // Test: Same thing with templates: - int a; - float b; - hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel), KERNEL_CONFIG, a, b); - -#define TYPE_PARAM_CONFIG int, float - hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel), KERNEL_CONFIG, a, b); - - // Test: Passing hipLaunchKernelGGL inside another macro: - float e0; - MY_LAUNCH_MACRO(hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad), e0, j); - MY_LAUNCH_MACRO(WRAP(hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad)), e0, j); - -#ifdef EXTRA_PARENS_1 - // Don't wrap hipLaunchKernelGGL in extra set of parens: - MY_LAUNCH_MACRO((hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad)), e0, j); -#endif - - MY_LAUNCH(hipLaunchKernelGGL(vAdd, dim3(1024), dim3(1), - 0, 0, Ad), true, "firstCall"); - - float* A; - float e1; - MY_LAUNCH_WITH_PAREN(hipMalloc(&A, 100), true, "launch2"); - -#ifdef EXTRA_PARENS_2 - // MY_LAUNCH_WITH_PAREN wraps cmd in () which can cause issues. - MY_LAUNCH_WITH_PAREN(hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad), true, "firstCall"); -#endif - - HIPCHECK(hipHostFree(result_h)); - HIPCHECK(hipFree(result_d)); - - passed(); -} diff --git a/tests/src/kernel/hipLaunchParmFunctor.cpp b/tests/src/kernel/hipLaunchParmFunctor.cpp deleted file mode 100644 index e263d2ba74..0000000000 --- a/tests/src/kernel/hipLaunchParmFunctor.cpp +++ /dev/null @@ -1,425 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp HIPCC_OPTIONS -O3 - * TEST: %t - * HIT_END - */ - -#include "../test_common.h" - -#define test_passed(test_name) printf("%s %s PASSED!%s\n", KGRN, #test_name, KNRM); - -class HipFunctorTests { - public: - // Test that a class functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForSimpleClassFunctor(void); - // Test that a templated class functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForClassTemplateFunctor(void); - // Test that a class functor object ptr can be passed to hiplaunchparam - // and can be used in kernel - void TestForClassObjPtrFunctor(void); - // Test that a class object containing functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForFunctorContainInClassObj(void); - // Test that a stuct functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForSimpleStructFunctor(void); - // Test that a stuct functor object ptr can be passed to hiplaunchparam - // and can be used in kernel - void TestForStructObjPtrFunctor(void); - // Test that a templated struct functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForStructTemplateFunctor(void); - // Test that a struct object containing functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForFunctorContainInStructObj(void); -}; - -static const int BLOCK_DIM_SIZE = 1024; -static const int THREADS_PER_BLOCK = 1; - -// class functor tests - -// Simple doubler Functor -class DoublerFunctor{ - public: - __device__ int operator()(int x) { return x * 2;} -}; - -// simple doubler functor passed to kernel -__global__ void DoublerFunctorKernel( - DoublerFunctor doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForSimpleClassFunctor(void) { - DoublerFunctor doubler; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults); - - // Validation part of TestForSimpleClassFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); -} - -// pointer functor passed to kernel -__global__ void PtrDoublerFunctorKernel( - DoublerFunctor *doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = (*doubler_)(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForClassObjPtrFunctor(void) { - DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)]; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults); - - // Validation part of TestForClassObjPtrFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); - delete[] ptrdoubler; -} - -class compare { - public: - template - __device__ bool operator()(const T1& v1, const T2& v2) { - return v1 > v2; - } -}; - -// template functor passed to kernel -__global__ void TemplateFunctorKernel( - compare compare_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - deviceResult[x] = compare_(2.2, 2.1); - deviceResult[x] = compare_(2, 1); - deviceResult[x] = compare_('b', 'a'); -} - -void HipFunctorTests::TestForClassTemplateFunctor(void) { - compare comparefunctor; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults); - - // Validation part of TestForClassTemplateFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); -} - - -// Doubler calculator -class DoublerCalculator { - public: - int a, result; - // fucntor contained in class object - DoublerFunctor doubler; -}; - -// doubler functor conatined in class obj passed to kernel -__global__ void DoublerCalculatorFunctorKernel( - DoublerCalculator doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_.doubler(doubler_.a); - deviceResult[x] = (doubler_.result == result); -} - -void HipFunctorTests::TestForFunctorContainInClassObj(void) { - DoublerCalculator Doubler; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - Doubler.a = 5; - Doubler.result = 10; - // pass comparefunctor to hipLaunchParm - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults); - - // Validation part of TestForStructTemplateFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); -} - -// Struct functor tests - -// Simple doubler Functor -struct sDoublerFunctor { - public: - __device__ int operator()(int x) { return x * 2;} -}; - - -// simple sturct doubler functor passed to kernel -__global__ void structDoublerFunctorKernel( - sDoublerFunctor doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForSimpleStructFunctor(void) { - sDoublerFunctor doubler; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults); - - // Validation part of TestForSimpleStructFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); -} - -// ptr functor passed to kernel -__global__ void structPtrDoublerFunctorKernel( - sDoublerFunctor *doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = (*doubler_)(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForStructObjPtrFunctor(void) { - sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)]; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults); - - // Validation part of TestForStructObjPtrFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); - delete[] ptrdoubler; -} - -struct sCompare { - public: - template< typename T1, typename T2 > - __device__ bool operator()(const T1& v1, const T2& v2) { - return v1 > v2; - } -}; - -// template functor passed to kernel -__global__ void structTemplateFunctorKernel( - sCompare compare_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - deviceResult[x] = compare_(2.2, 2.1); - deviceResult[x] = compare_(2, 1); - deviceResult[x] = compare_('b', 'a'); -} - -void HipFunctorTests::TestForStructTemplateFunctor(void) { - sCompare comparefunctor; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - - // pass comparefunctor to hipLaunchKernelGGL - hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults); - - // Validation part of TestForStructTemplateFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); -} - -// Doubler calculator struct -struct sDoublerCalculator { - public: - int a, result; - // fucntor contained in class object - DoublerFunctor doubler; -}; - - - -// doubler functor contained in struct passed to kernel -__global__ void DoublerCalculatorFunctorKernel( - sDoublerCalculator doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_.doubler(doubler_.a); - deviceResult[x] = (doubler_.result == result); -} - -void HipFunctorTests::TestForFunctorContainInStructObj(void) { - sDoublerCalculator Doubler; - bool *deviceResults, *hostResults; - HIPCHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIPCHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - Doubler.a = 5; - Doubler.result = 10; - HIPCHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - - - // pass comparefunctor to hipLaunchKernelGGL - hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults); - - // Validation part of TestForStructTemplateFunctor - HIPCHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - HIPASSERT(hostResults[k] == true); - HIPCHECK(hipHostFree(hostResults)); - HIPCHECK(hipFree(deviceResults)); -} - -int main() { - HipFunctorTests FunctorTests; - FunctorTests.TestForSimpleClassFunctor(); - test_passed(TestForSimpleClassFunctor); - - FunctorTests.TestForClassObjPtrFunctor(); - test_passed(TestForClassObjPtrFunctor); - - FunctorTests.TestForClassTemplateFunctor(); - test_passed(TestForClassTemplateFunctor); - - FunctorTests.TestForSimpleStructFunctor(); - test_passed(TestForSimpleStructFunctor); - - FunctorTests.TestForStructObjPtrFunctor(); - test_passed(TestForStructObjPtrFunctor); - - FunctorTests.TestForStructTemplateFunctor(); - test_passed(TestForStructTemplateFunctor); - - FunctorTests.TestForFunctorContainInClassObj(); - test_passed(TestForFunctorContainInClassObj); - - FunctorTests.TestForFunctorContainInStructObj(); - test_passed(TestForFunctorContainInStructObj); -} diff --git a/tests/src/kernel/hipPrintfKernel.cpp b/tests/src/kernel/hipPrintfKernel.cpp deleted file mode 100644 index 975dcd6b16..0000000000 --- a/tests/src/kernel/hipPrintfKernel.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#define HIP_ENABLE_PRINTF - -#include "test_common.h" - -__global__ void run_printf() { printf("Hello World\n"); } - -int main() { - int device_count = 0; - hipGetDeviceCount(&device_count); - for (int i = 0; i < device_count; ++i) { - hipSetDevice(i); - hipLaunchKernelGGL(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0); - hipDeviceSynchronize(); - } - passed(); -} diff --git a/tests/src/kernel/hipShflTests.cpp b/tests/src/kernel/hipShflTests.cpp deleted file mode 100644 index 4b37489fad..0000000000 --- a/tests/src/kernel/hipShflTests.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" -#include - -#define WIDTH 4 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -template -__global__ void matrixTranspose(T* out, T* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - T val = in[x]; - for (int i = 0; i < width; i++) { - for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); - } -} - -// CPU implementation of matrix transpose -template -void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -void getFactor(int& fact) { fact = 101; } -void getFactor(unsigned int& fact) { fact = static_cast(INT32_MAX)+1; } -void getFactor(float& fact) { fact = 2.5; } -void getFactor(__half& fact) { fact = 2.5; } -void getFactor(double& fact) { fact = 2.5; } -void getFactor(long& fact) { fact = 202; } -void getFactor(unsigned long& fact) { fact = static_cast(__LONG_MAX__)+1; } -void getFactor(long long& fact) { fact = 303; } -void getFactor(unsigned long long& fact) { fact = static_cast(__LONG_LONG_MAX__)+1; } - -template int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { - errors++; - } - } - return errors; -} - -template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { - errors++; - } - } - return errors; -} - -template -void init(T* Matrix) { - // initialize the input data - T factor; - getFactor(factor); - for (int i = 0; i < NUM; i++) { - Matrix[i] = (T)i + factor; - } -} - -template <> -void init(__half* Matrix) { - // initialize the input data - __half factor; - getFactor(factor); - for (int i = 0; i < NUM; i++) { - Matrix[i] = i + __half2float(factor); - } -} - -template -void runTest() { - T* Matrix; - T* TransposeMatrix; - T* cpuTransposeMatrix; - - T* gpuMatrix; - T* gpuTransposeMatrix; - - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); - - int errors; - - Matrix = (T*)malloc(NUM * sizeof(T)); - TransposeMatrix = (T*)malloc(NUM * sizeof(T)); - cpuTransposeMatrix = (T*)malloc(NUM * sizeof(T)); - - init(Matrix); - - // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(T)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(T)); - - // Memory transfer from host to device - hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice); - - // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0, - gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Memory transfer from device to host - hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = compare(TransposeMatrix, cpuTransposeMatrix); - double eps = 1.0E-6; - // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - if (errors != 0) { - failed("Mismatch present"); - } -} - -int main() { - runTest(); - runTest(); - runTest(); - runTest(); - runTest<__half>(); - runTest(); - runTest(); - runTest(); - runTest(); - passed(); -} diff --git a/tests/src/kernel/hipShflUpDownTest.cpp b/tests/src/kernel/hipShflUpDownTest.cpp deleted file mode 100644 index 86aa5d6565..0000000000 --- a/tests/src/kernel/hipShflUpDownTest.cpp +++ /dev/null @@ -1,185 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" -#include -const int size = 32; - -template -__global__ void shflDownSum(T* a, int size) { - T val = a[threadIdx.x]; - for (int i = size / 2; i > 0; i /= 2) { - val += __shfl_down(val, i, size); - } - a[threadIdx.x] = val; -} - -template -__global__ void shflUpSum(T* a, int size) { - T val = a[threadIdx.x]; - for (int i = size / 2; i > 0; i /= 2) { - val += __shfl_up(val, i, size); - } - a[threadIdx.x] = val; -} - -template -__global__ void shflXorSum(T* a, int size) { - T val = a[threadIdx.x]; - for (int i = size/2; i > 0; i /= 2) - val += __shfl_xor(val, i, size); - a[threadIdx.x] = val; -} - -void getFactor(int& fact) { fact = 101; } -void getFactor(unsigned int& fact) { fact = static_cast(INT32_MAX)+1; } -void getFactor(float& fact) { fact = 2.5; } -void getFactor(double& fact) { fact = 2.5; } -void getFactor(__half& fact) { fact = 2.5; } -void getFactor(long& fact) { fact = 202; } -void getFactor(unsigned long& fact) { fact = static_cast(__LONG_MAX__)+1; } -void getFactor(long long& fact) { fact = 303; } -void getFactor(unsigned long long& fact) { fact = static_cast(__LONG_LONG_MAX__)+1; } - -template T sum(T* a) { - T cpuSum = 0; - T factor; - getFactor(factor); - for (int i = 0; i < size; i++) { - a[i] = i + factor; - cpuSum += a[i]; - } - return cpuSum; -} - -template <> __half sum(__half* a) { - __half cpuSum = 0; - __half factor; - getFactor(factor); - for (int i = 0; i < size; i++) { - a[i] = i + __half2float(factor); - cpuSum = __half2float(cpuSum) + __half2float(a[i]); - } - return cpuSum; -} - -template bool compare(T gpuSum, T cpuSum) { - if (gpuSum != cpuSum) { - return true; - } - return false; -} - -template <> bool compare(__half gpuSum, __half cpuSum) { - if (__half2float(gpuSum) != __half2float(cpuSum)) { - return true; - } - return false; -} - -template -void runTestShflUp() { - const int size = 32; - T a[size]; - T cpuSum = sum(a); - T* d_a; - hipMalloc(&d_a, sizeof(T) * size); - hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault); - hipLaunchKernelGGL(shflUpSum, 1, size, 0, 0, d_a, size); - hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault); - if (compare(a[size - 1], cpuSum)) { - hipFree(d_a); - failed("Shfl Up Sum did not match."); - } - hipFree(d_a); -} - -template -void runTestShflDown() { - T a[size]; - T cpuSum = sum(a); - T* d_a; - hipMalloc(&d_a, sizeof(T) * size); - hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault); - hipLaunchKernelGGL(shflDownSum, 1, size, 0, 0, d_a, size); - hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault); - if (compare(a[0], cpuSum)) { - hipFree(d_a); - failed("Shfl Down Sum did not match."); - } - hipFree(d_a); -} - -template -void runTestShflXor() { - T a[size]; - T cpuSum = sum(a); - T* d_a; - hipMalloc(&d_a, sizeof(T) * size); - hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault); - hipLaunchKernelGGL(shflXorSum, 1, size, 0, 0, d_a, size); - hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault); - if (compare(a[0], cpuSum)) { - hipFree(d_a); - failed("Shfl Xor Sum did not match."); - } - hipFree(d_a); -} -int main() { - runTestShflUp(); - runTestShflUp(); - runTestShflUp(); - runTestShflUp(); - runTestShflUp<__half>(); - runTestShflUp(); - runTestShflUp(); - runTestShflUp(); - runTestShflUp(); - - runTestShflDown(); - runTestShflDown(); - runTestShflDown(); - runTestShflDown(); - runTestShflDown<__half>(); - runTestShflDown(); - runTestShflDown(); - runTestShflDown(); - runTestShflDown(); - - runTestShflXor(); - runTestShflXor(); - runTestShflXor(); - runTestShflXor(); - runTestShflXor<__half>(); - runTestShflXor(); - runTestShflXor(); - runTestShflXor(); - runTestShflXor(); - passed(); -} diff --git a/tests/src/kernel/hipTestConstant.cpp b/tests/src/kernel/hipTestConstant.cpp deleted file mode 100644 index 497cb9aa53..0000000000 --- a/tests/src/kernel/hipTestConstant.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define HIP_ASSERT(status) assert(status == hipSuccess) - -#define LEN 512 -#define SIZE 2048 - -__constant__ int Value[LEN]; - -__global__ void Get(int* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = Value[tid]; -} - -int main() { - int *A, *B, *Ad; - A = new int[LEN]; - B = new int[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = -1 * i; - B[i] = 0; - } - - HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); - - HIP_ASSERT(hipMemcpyToSymbol(HIP_SYMBOL(Value), A, SIZE, 0, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(Get, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(A[i] == B[i]); - } - passed(); -} diff --git a/tests/src/kernel/hipTestGlobalVariable.cpp b/tests/src/kernel/hipTestGlobalVariable.cpp deleted file mode 100644 index d4eeaa5182..0000000000 --- a/tests/src/kernel/hipTestGlobalVariable.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define HIP_ASSERT(status) assert(status == hipSuccess) - -#define LEN 512 -#define SIZE 2048 - - __constant__ int ConstantGlobalVar = 123; - - static __global__ void kernel(int* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = ConstantGlobalVar; - } - - void runTestConstantGlobalVar() { - int *A, *Ad; - A = new int[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - - HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); - hipLaunchKernelGGL(kernel, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(123 == A[i]); - } - } - - __device__ int GlobalArray[LEN]; - - static __global__ void kernelWrite() { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - GlobalArray[tid] = tid; - } - static __global__ void kernelRead(int* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = GlobalArray[tid]; - } - - void runTestGlobalArray() { - int *A, *Ad; - A = new int[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - - HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); - hipLaunchKernelGGL(kernelWrite, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0); - hipLaunchKernelGGL(kernelRead, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); - HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - - for (unsigned i = 0; i < LEN; i++) { - assert(i == A[i]); - } - } - -int main() { - runTestConstantGlobalVar(); - runTestGlobalArray(); - passed(); -} diff --git a/tests/src/kernel/hipTestMallocKernel.cpp b/tests/src/kernel/hipTestMallocKernel.cpp deleted file mode 100644 index 56f38c0999..0000000000 --- a/tests/src/kernel/hipTestMallocKernel.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM all - * TEST: %t - * HIT_END - */ - -#include -#include -#include - -#define HIP_ASSERT(status) assert(hipSuccess == status); - -#define NUM 1024 -#define SIZE NUM * 8 - -__global__ void Alloc(uint64_t* Ptr) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ptr[tid] = (uint64_t)malloc(128); -} - -__global__ void Free(uint64_t* Ptr) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - free((void*)Ptr[tid]); -} - -int main() { - uint64_t *hPtr, *dPtr; - hPtr = new uint64_t[NUM]; - for (uint32_t i = 0; i < NUM; i++) { - hPtr[i] = 1; - } - int devCnt; - hipGetDeviceCount(&devCnt); - for (uint32_t i = 0; i < devCnt; i++) { - HIP_ASSERT(hipSetDevice(i)); - HIP_ASSERT(hipMalloc((void**)&dPtr, SIZE)); - HIP_ASSERT(hipMemcpy(dPtr, hPtr, SIZE, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(Alloc, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, dPtr); - HIP_ASSERT(hipMemcpy(hPtr, dPtr, SIZE, hipMemcpyDeviceToHost)); - assert(hPtr[0] != 0); - hipLaunchKernelGGL(Free, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, dPtr); - HIP_ASSERT(hipFree(dPtr)); - for (uint32_t i = 1; i < NUM; i++) { - assert(hPtr[i] == hPtr[i - 1] + 4096); - } - } -} diff --git a/tests/src/kernel/hipTestMemKernel.cpp b/tests/src/kernel/hipTestMemKernel.cpp deleted file mode 100644 index e5606ec2e9..0000000000 --- a/tests/src/kernel/hipTestMemKernel.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define LEN8 8 * 4 -#define LEN9 9 * 4 -#define LEN10 10 * 4 -#define LEN11 11 * 4 -#define LEN12 12 * 4 - -__global__ void MemCpy8(uint8_t* In, uint8_t* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memcpy(Out + tid * 8, In + tid * 8, 8); -} - -__global__ void MemCpy9(uint8_t* In, uint8_t* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memcpy(Out + tid * 9, In + tid * 9, 9); -} - -__global__ void MemCpy10(uint8_t* In, uint8_t* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memcpy(Out + tid * 10, In + tid * 10, 10); -} - -__global__ void MemCpy11(uint8_t* In, uint8_t* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memcpy(Out + tid * 11, In + tid * 11, 11); -} - -__global__ void MemCpy12(uint8_t* In, uint8_t* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memcpy(Out + tid * 12, In + tid * 12, 12); -} - -__global__ void MemSet8(uint8_t* In) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memset(In + tid * 8, 1, 8); -} - -__global__ void MemSet9(uint8_t* In) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memset(In + tid * 9, 1, 9); -} - -__global__ void MemSet10(uint8_t* In) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memset(In + tid * 10, 1, 10); -} - -__global__ void MemSet11(uint8_t* In) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memset(In + tid * 11, 1, 11); -} - -__global__ void MemSet12(uint8_t* In) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - memset(In + tid * 12, 1, 12); -} - -int main() { - uint8_t *A, *Ad, *B, *Bd, *C, *Cd; - A = new uint8_t[LEN8]; - B = new uint8_t[LEN8]; - C = new uint8_t[LEN8]; - for (uint32_t i = 0; i < LEN8; i++) { - A[i] = i; - B[i] = 0; - C[i] = 0; - } - hipMalloc((void**)&Ad, LEN8); - hipMalloc((void**)&Bd, LEN8); - hipMalloc((void**)&Cd, LEN8); - hipMemcpy(Ad, A, LEN8, hipMemcpyHostToDevice); - hipLaunchKernelGGL(MemCpy8, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Ad, Bd); - hipLaunchKernelGGL(MemSet8, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Cd); - hipMemcpy(B, Bd, LEN8, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, LEN8, hipMemcpyDeviceToHost); - for (uint32_t i = 0; i < LEN8; i++) { - assert(A[i] == B[i]); - assert(C[i] == 1); - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - A = new uint8_t[LEN9]; - B = new uint8_t[LEN9]; - C = new uint8_t[LEN9]; - for (uint32_t i = 0; i < LEN9; i++) { - A[i] = i; - B[i] = 0; - C[i] = 0; - } - hipMalloc((void**)&Ad, LEN9); - hipMalloc((void**)&Bd, LEN9); - hipMalloc((void**)&Cd, LEN9); - hipMemcpy(Ad, A, LEN9, hipMemcpyHostToDevice); - hipLaunchKernelGGL(MemCpy9, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Ad, Bd); - hipLaunchKernelGGL(MemSet9, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Cd); - hipMemcpy(B, Bd, LEN9, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, LEN9, hipMemcpyDeviceToHost); - for (uint32_t i = 0; i < LEN9; i++) { - assert(A[i] == B[i]); - assert(C[i] == 1); - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - A = new uint8_t[LEN10]; - B = new uint8_t[LEN10]; - C = new uint8_t[LEN10]; - for (uint32_t i = 0; i < LEN10; i++) { - A[i] = i; - B[i] = 0; - C[i] = 0; - } - hipMalloc((void**)&Ad, LEN10); - hipMalloc((void**)&Bd, LEN10); - hipMalloc((void**)&Cd, LEN10); - hipMemcpy(Ad, A, LEN10, hipMemcpyHostToDevice); - hipLaunchKernelGGL(MemCpy10, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Ad, Bd); - hipLaunchKernelGGL(MemSet10, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Cd); - hipMemcpy(B, Bd, LEN10, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, LEN10, hipMemcpyDeviceToHost); - for (uint32_t i = 0; i < LEN10; i++) { - assert(A[i] == B[i]); - assert(C[i] == 1); - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - A = new uint8_t[LEN11]; - B = new uint8_t[LEN11]; - C = new uint8_t[LEN11]; - for (uint32_t i = 0; i < LEN11; i++) { - A[i] = i; - B[i] = 0; - C[i] = 0; - } - hipMalloc((void**)&Ad, LEN11); - hipMalloc((void**)&Bd, LEN11); - hipMalloc((void**)&Cd, LEN11); - hipMemcpy(Ad, A, LEN11, hipMemcpyHostToDevice); - hipLaunchKernelGGL(MemCpy11, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Ad, Bd); - hipLaunchKernelGGL(MemSet11, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Cd); - hipMemcpy(B, Bd, LEN11, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, LEN11, hipMemcpyDeviceToHost); - for (uint32_t i = 0; i < LEN11; i++) { - assert(A[i] == B[i]); - assert(C[i] == 1); - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - A = new uint8_t[LEN12]; - B = new uint8_t[LEN12]; - C = new uint8_t[LEN12]; - for (uint32_t i = 0; i < LEN12; i++) { - A[i] = i; - B[i] = 0; - C[i] = 0; - } - hipMalloc((void**)&Ad, LEN12); - hipMalloc((void**)&Bd, LEN12); - hipMalloc((void**)&Cd, LEN12); - hipMemcpy(Ad, A, LEN12, hipMemcpyHostToDevice); - hipLaunchKernelGGL(MemCpy12, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Ad, Bd); - hipLaunchKernelGGL(MemSet12, dim3(2, 1, 1), dim3(2, 1, 1), 0, 0, Cd); - hipMemcpy(B, Bd, LEN12, hipMemcpyDeviceToHost); - hipMemcpy(C, Cd, LEN12, hipMemcpyDeviceToHost); - for (uint32_t i = 0; i < LEN12; i++) { - assert(A[i] == B[i]); - assert(C[i] == 1); - } - - delete[] A; - delete[] B; - delete[] C; - hipFree(Ad); - hipFree(Bd); - hipFree(Cd); - - passed(); -} diff --git a/tests/src/kernel/inline_asm_vadd.cpp b/tests/src/kernel/inline_asm_vadd.cpp deleted file mode 100644 index c3ca940a9a..0000000000 --- a/tests/src/kernel/inline_asm_vadd.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -associated documentation files (the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, publish, distribute, -sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - - -#include - -// hip header file -#include "hip/hip_runtime.h" - -#define NUM 1024 - -#define THREADS_PER_BLOCK_X 4 - -// Device (Kernel) function, it must be void -// hipLaunchParm provides the execution configuration -__global__ void vadd_asm(float* out, float* in) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - -#ifdef __HIP_PLATFORM_NVIDIA__ - asm volatile("add.f32 %0,%1,%2;" : "=f"(out[i]) : "f"(in[i]), "f"(out[i])); -#endif - -#ifdef __HIP_PLATFORM_AMD__ - asm volatile("v_add_f32_e32 %0, %1, %2" : "=v"(out[i]) : "v"(in[i]), "v"(out[i])); -#endif -} - -// CPU implementation of Vector Result -void addCPUReference(float* output, float* input) { - for (unsigned int j = 0; j < NUM; j++) { - output[j] = input[j] + output[j]; - } -} - -int main() { - float* VectorA; - float* ResultVector; - float* VectorB; - - float* gpuVector; - float* gpuResultVector; - - int i; - int errors; - - VectorA = (float*)malloc(NUM * sizeof(float)); - ResultVector = (float*)malloc(NUM * sizeof(float)); - VectorB = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - VectorA[i] = (float)i * 10.0f; - VectorB[i] = (float)i * 30.0f; - } - - // allocate the memory on the device side - hipMalloc((void**)&gpuVector, NUM * sizeof(float)); - hipMalloc((void**)&gpuResultVector, NUM * sizeof(float)); - - // Memory transfer from host to device - hipMemcpy(gpuVector, VectorA, NUM * sizeof(float), hipMemcpyHostToDevice); - hipMemcpy(gpuResultVector, VectorB, NUM * sizeof(float), hipMemcpyHostToDevice); - - // Lauching kernel from host - hipLaunchKernelGGL(vadd_asm, dim3(NUM / THREADS_PER_BLOCK_X), dim3(THREADS_PER_BLOCK_X), 0, 0, - gpuResultVector, gpuVector); - - // Memory transfer from device to host - hipMemcpy(ResultVector, gpuResultVector, NUM * sizeof(float), hipMemcpyDeviceToHost); - - // CPU Result computation - addCPUReference(VectorB, VectorA); - - // verify the results - errors = 0; - double eps = 1.0E-3; - for (i = 0; i < NUM; i++) { - if (std::abs(ResultVector[i] - VectorB[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - hipFree(gpuVector); - hipFree(gpuResultVector); - - hipDeviceReset(); - - // free the resources on host side - free(VectorA); - free(ResultVector); - free(VectorB); - - return errors; -} diff --git a/tests/src/kernel/inline_asm_vmac.cpp b/tests/src/kernel/inline_asm_vmac.cpp deleted file mode 100644 index d9fddede73..0000000000 --- a/tests/src/kernel/inline_asm_vmac.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// hip header file -#include "hip/hip_runtime.h" - -#define NUM 1024 - -#define THREADS_PER_BLOCK_X 4 - -// Device (Kernel) function, it must be void -// hipLaunchParm provides the execution configuration -__global__ void vmac_asm(hipLaunchParm lp, float* out, float* in) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - - asm volatile("v_mac_f32_e32 %0, %2, %3" : "=v"(out[i]) : "0"(out[i]), "v"(a), "v"(in[i])); -} - -// CPU implementation of saxpy -void CPUReference(float* output, float* input) { - for (unsigned int j = 0; j < NUM; j++) { - output[j] = a * input[j] + output[j]; - } -} - -int main() { - float* VectorA; - float* ResultVector; - float* VectorB; - - float* gpuVector; - float* gpuResultVector; - - const float a = 10.0f int i; - int errors; - - VectorA = (float*)malloc(NUM * sizeof(float)); - ResultVector = (float*)malloc(NUM * sizeof(float)); - VectorB = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - VectorA[i] = (float)i * 10.0f; - VectorB[i] = (float)i * 30.0f; - } - - // allocate the memory on the device side - hipMalloc((void**)&gpuVector, NUM * sizeof(float)); - hipMalloc((void**)&gpuResultVector, NUM * sizeof(float)); - - // Memory transfer from host to device - hipMemcpy(gpuVector, VectorA, NUM * sizeof(float), hipMemcpyHostToDevice); - hipMemcpy(gpuResultVector, VectorB, NUM * sizeof(float), hipMemcpyHostToDevice); - - // Lauching kernel from host - hipLaunchKernel(vmac_asm, dim3(NUM / THREADS_PER_BLOCK_X), dim3(THREADS_PER_BLOCK_X), 0, 0, - gpuResultVector, gpuVector); - - // Memory transfer from device to host - hipMemcpy(ResultVector, gpuResultVector, NUM * sizeof(float), hipMemcpyDeviceToHost); - - // CPU Result computation - addCPUReference(VectorB, VectorA); - - // verify the results - errors = 0; - double eps = 1.0E-3; - for (i = 0; i < NUM; i++) { - if (std::abs(ResultVector[i] - VectorB[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - hipFree(gpuVector); - hipFree(gpuResultVector); - - hipDeviceReset(); - - // free the resources on host side - free(VectorA); - free(ResultVector); - free(VectorB); - - return errors; -} diff --git a/tests/src/kernel/launch_bounds.cpp b/tests/src/kernel/launch_bounds.cpp deleted file mode 100644 index 94cb5dbe07..0000000000 --- a/tests/src/kernel/launch_bounds.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test launch bounds and initialization conditions. - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int p_blockSize = 256; - - -__global__ void __launch_bounds__(256, 2) - myKern(hipLaunchParm lp, int* C, const int* A, int N, int xfactor) { - int tid = (blockIdx.x * blockDim.x + threadIdx.x); - - if (tid < N) { - C[tid] = A[tid]; - } -}; - - -void parseMyArguments(int argc, char* argv[]) { - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, "--blockSize")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_blockSize)) { - failed("Bad peerDevice argument"); - } - } else { - failed("Bad argument '%s'", arg); - } - }; -}; - - -int main(int argc, char* argv[]) { - parseMyArguments(argc, argv); - - size_t Nbytes = N * sizeof(int); - - int *A_d, *C_d, *A_h, *C_h; - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - A_h = (int*)malloc(Nbytes); - C_h = (int*)malloc(Nbytes); - - for (int i = 0; i < N; i++) { - A_h[i] = i * 10; - C_h[i] = 0x0; - } - - int blocks = N / p_blockSize; - printf("running with N=%zu p_blockSize=%d blocks=%d\n", N, p_blockSize, blocks); - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipGetLastError()); - - hipLaunchKernel(myKern, dim3(blocks), dim3(p_blockSize), 0, 0, C_d, A_d, N, 0); - -#ifdef __HIP_PLATFORM_NVIDIA__ - cudaFuncAttributes attrib; - cudaFuncGetAttributes(&attrib, myKern); - printf("binaryVersion = %d\n", attrib.binaryVersion); - printf("cacheModeCA = %d\n", attrib.cacheModeCA); - printf("constSizeBytes = %zu\n", attrib.constSizeBytes); - printf("localSizeBytes = %zud\n", attrib.localSizeBytes); - printf("maxThreadsPerBlock = %d\n", attrib.maxThreadsPerBlock); - printf("numRegs = %d\n", attrib.numRegs); - printf("ptxVersion = %d\n", attrib.ptxVersion); - printf("sharedSizeBytes = %zud\n", attrib.sharedSizeBytes); -#endif - - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipGetLastError()); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - - for (int i = 0; i < N; i++) { - int goldVal = i * 10; - if (C_h[i] != goldVal) { - failed("mismatch at index:%d computed:%02d, gold:%02d\n", i, (int)C_h[i], (int)goldVal); - } - } - - passed(); -}; diff --git a/tests/src/nvcc/Device/hipChooseDevice.cpp b/tests/src/nvcc/Device/hipChooseDevice.cpp deleted file mode 100644 index 0a37173e79..0000000000 --- a/tests/src/nvcc/Device/hipChooseDevice.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - int dev; - hipDeviceProp_t prop; - HIP_PRINT_STATUS(hipChooseDevice(&dev, &prop)); - HIP_PRINT_STATUS(hipChooseDevice(0, &prop)); - HIP_PRINT_STATUS(hipChooseDevice(0, 0)); -} diff --git a/tests/src/nvcc/Device/hipDeviceGetAttribute.cpp b/tests/src/nvcc/Device/hipDeviceGetAttribute.cpp deleted file mode 100644 index 35a9739409..0000000000 --- a/tests/src/nvcc/Device/hipDeviceGetAttribute.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - int val; - hipDeviceAttribute_t attr = - hipDeviceAttributeMaxThreadsPerBlock; ///< Maximum number of threads per block. - - HIP_PRINT_STATUS(hipDeviceGetAttribute(NULL, attr, 0)); - HIP_PRINT_STATUS(hipDeviceGetAttribute(&val, attr, 0)); - HIP_PRINT_STATUS(hipDeviceGetAttribute(NULL, attr, -1)); - HIP_PRINT_STATUS(hipDeviceGetAttribute(&val, attr, -1)); - attr = hipDeviceAttribute_t(91); - - HIP_PRINT_STATUS(hipDeviceGetAttribute(NULL, attr, 0)); - HIP_PRINT_STATUS(hipDeviceGetAttribute(&val, attr, 0)); - HIP_PRINT_STATUS(hipDeviceGetAttribute(NULL, attr, -1)); - HIP_PRINT_STATUS(hipDeviceGetAttribute(&val, attr, -1)); -} diff --git a/tests/src/nvcc/Device/hipDeviceGetCacheConfig.cpp b/tests/src/nvcc/Device/hipDeviceGetCacheConfig.cpp deleted file mode 100644 index 6bb7b3b9e9..0000000000 --- a/tests/src/nvcc/Device/hipDeviceGetCacheConfig.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - hipFuncCache_t pCacheConfig; - HIP_PRINT_STATUS(hipDeviceGetCacheConfig(&pCacheConfig)); - HIP_PRINT_STATUS(hipDeviceGetCacheConfig(NULL)); -} diff --git a/tests/src/nvcc/Device/hipDeviceGetLimit.cpp b/tests/src/nvcc/Device/hipDeviceGetLimit.cpp deleted file mode 100644 index e3e7965ba5..0000000000 --- a/tests/src/nvcc/Device/hipDeviceGetLimit.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - hipLimit_t lim = hipLimitMallocHeapSize; - HIP_PRINT_STATUS(hipDeviceGetLimit(NULL, lim)); -} diff --git a/tests/src/nvcc/Device/hipDeviceGetSharedMemConfig.cpp b/tests/src/nvcc/Device/hipDeviceGetSharedMemConfig.cpp deleted file mode 100644 index 62b1dc787d..0000000000 --- a/tests/src/nvcc/Device/hipDeviceGetSharedMemConfig.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - hipSharedMemConfig_t config; - HIP_PRINT_STATUS(hipDeviceGetSharedMemConfig(NULL)); - HIP_PRINT_STATUS(hipDeviceGetSharedMemConfig(&config)); -} diff --git a/tests/src/nvcc/Device/hipGetDevice.cpp b/tests/src/nvcc/Device/hipGetDevice.cpp deleted file mode 100644 index 6afb82b94d..0000000000 --- a/tests/src/nvcc/Device/hipGetDevice.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - HIP_PRINT_STATUS(hipGetDevice(NULL)); - HIP_PRINT_STATUS(hipGetDevice(0)); -} diff --git a/tests/src/nvcc/Device/hipGetDeviceCount.cpp b/tests/src/nvcc/Device/hipGetDeviceCount.cpp deleted file mode 100644 index 403350e822..0000000000 --- a/tests/src/nvcc/Device/hipGetDeviceCount.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { HIP_PRINT_STATUS(hipGetDeviceCount(NULL)); } diff --git a/tests/src/nvcc/Device/hipGetDeviceProperties.cpp b/tests/src/nvcc/Device/hipGetDeviceProperties.cpp deleted file mode 100644 index aa2cadd192..0000000000 --- a/tests/src/nvcc/Device/hipGetDeviceProperties.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - hipDeviceProp_t prop; - HIP_PRINT_STATUS(hipGetDeviceProperties(&prop, -1)); - int cnt; - hipGetDeviceCount(&cnt); - HIP_PRINT_STATUS(hipGetDeviceProperties(&prop, cnt + 1)); - HIP_PRINT_STATUS(hipGetDeviceProperties(NULL, 0)); -} diff --git a/tests/src/nvcc/Device/hipSetDevice.cpp b/tests/src/nvcc/Device/hipSetDevice.cpp deleted file mode 100644 index 5f6c9e2d3b..0000000000 --- a/tests/src/nvcc/Device/hipSetDevice.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - HIP_PRINT_STATUS(hipSetDevice(-1)); - int count; - hipGetDeviceCount(&count); - HIP_PRINT_STATUS(hipSetDevice(count + 1)); -} diff --git a/tests/src/nvcc/Device/hipSetDeviceFlags.cpp b/tests/src/nvcc/Device/hipSetDeviceFlags.cpp deleted file mode 100644 index 5b3c35e214..0000000000 --- a/tests/src/nvcc/Device/hipSetDeviceFlags.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "test_common.h" - -int main() { - HIP_PRINT_STATUS(hipSetDeviceFlags(-1)); - HIP_PRINT_STATUS(hipSetDeviceFlags(11)); -} diff --git a/tests/src/p2p/hipPeerToPeer_simple.cpp b/tests/src/p2p/hipPeerToPeer_simple.cpp deleted file mode 100644 index 1f86fe4eb5..0000000000 --- a/tests/src/p2p/hipPeerToPeer_simple.cpp +++ /dev/null @@ -1,462 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for memset. -// Also serves as a template for other tests. - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * TEST: %t --memcpyWithPeer - * TEST: %t --mirrorPeers - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -bool p_memcpyWithPeer = false; // use the peer device for the P2P copy -bool p_mirrorPeers = - false; // in addition to mapping current to peer space, map peer to current space. -int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. - - -int g_currentDevice; -int g_peerDevice; - -void help(char* argv[]) { - printf("usage: %s [OPTIONS]\n", argv[0]); - printf(" --memcpyWithPeer : Perform memcpy with peer.\n"); - printf( - " --mirrorPeers : Mirror memory onto both default device and peerdevice. If 0, memory " - "is mapped only on the default device.\n"); - printf(" --peerDevice N : Set peer device.\n"); -}; - - -static hipError_t myHipMemcpy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind, - hipStream_t stream, bool async) { - if (async) { - hipError_t e = hipMemcpyAsync(dest, src, sizeBytes, kind, stream); - // HIPCHECK(hipStreamSynchronize(stream)); - return (e); - } else { - return hipMemcpy(dest, src, sizeBytes, kind); - }; -} - - -void parseMyArguments(int argc, char* argv[]) { - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, "--help")) { - help(argv); - exit(-1); - } else if (!strcmp(arg, "--memcpyWithPeer")) { - p_memcpyWithPeer = true; - } else if (!strcmp(arg, "--mirrorPeers")) { - p_mirrorPeers = true; - } else if (!strcmp(arg, "--peerDevice")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_peerDevice)) { - failed("Bad peerDevice argument"); - } - } else { - failed("Bad argument '%s'", arg); - } - }; -}; - -void syncBothDevices() { - int saveDevice; - HIPCHECK(hipGetDevice(&saveDevice)); - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipSetDevice(saveDevice)); -} - - -// Sets globals g_currentDevice, g_peerDevice -void setupPeerTests() { - int deviceCnt; - - HIPCHECK(hipGetDeviceCount(&deviceCnt)); - - g_currentDevice = p_gpuDevice; - g_peerDevice = (p_peerDevice == -1) ? ((g_currentDevice + 1) % deviceCnt) : p_peerDevice; - - printf("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, g_currentDevice, g_peerDevice, - deviceCnt); - - // Must be on a multi-gpu system: - assert(g_currentDevice != g_peerDevice); - - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_currentDevice, g_peerDevice)); - printf("dev#%d canAccessPeer:#%d=%d\n", g_currentDevice, g_peerDevice, canAccessPeer); - - assert(canAccessPeer); - - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceReset()); - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipDeviceReset()); -} - -//--- -// Test which enables peer2peer first, then allocates the memory. -void enablePeerFirst(bool useAsyncCopy) { - printf("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy); - - setupPeerTests(); - - // Always enable g_currentDevice to see the allocations on peerDevice. - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); - - if (p_mirrorPeers) { - // Mirror peers allows the peer device to see the allocations on currentDevice. - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); - assert(canAccessPeer); - - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); - } - - size_t Nbytes = N * sizeof(char); - - char *A_d0, *A_d1; - char* A_h; - - A_h = (char*)malloc(Nbytes); - - // allocate and initialize memory on device0 - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipMalloc(&A_d0, Nbytes)); - HIPCHECK(hipMemset(A_d0, memsetval, Nbytes)); - // hipDeviceSynchronize as hipMemset is asynchronous when destination memory is device memory - HIPCHECK(hipDeviceSynchronize()); - - // allocate and initialize memory on peer device - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipMalloc(&A_d1, Nbytes)); - HIPCHECK(hipMemset(A_d1, 0x13, Nbytes)); - // hipDeviceSynchronize as hipMemset is asynchronous when destination memory is device memory - HIPCHECK(hipDeviceSynchronize()); - - // Device0 push to device1, using P2P: - // NOTE : if p_mirrorPeers=0 and p_memcpyWithPeer=1, then peer device does not have mapping for - // A_d1 and we need to use a - // a host staging copy for the P2P access. - HIPCHECK(hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); - HIPCHECK(myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault, 0 /*stream*/, - useAsyncCopy)); // This is P2P copy. - - // Copy data back to host: - // Have to wait for previous operation to finish, since we are switching to another one: - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0 /*stream*/, useAsyncCopy)); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipSetDevice(g_currentDevice)); - - // Check host data: - for (int i = 0; i < N; i++) { - if (A_h[i] != memsetval) { - failed("mismatch at index:%d computed:0x%02x, golden memsetval:0x%02x\n", i, - (int)A_h[i], (int)memsetval); - } - } - - printf("==done: %s useAsyncCopy:%d\n\n", __func__, useAsyncCopy); -} - - -//--- -// Test which allocated memory first, then enables peer2peer. -// Enabling peer needs to scan all allocated memory and enable peer access. -void allocMemoryFirst(bool useAsyncCopy) { - printf("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy); - - setupPeerTests(); - - size_t Nbytes = N * sizeof(char); - - char *A_d0, *A_d1; - char* A_h; - - A_h = (char*)malloc(Nbytes); - - //--- - // allocate and initialize memory on device0 - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipMalloc(&A_d0, Nbytes)); - HIPCHECK(hipMemset(A_d0, memsetval, Nbytes)); - // hipDeviceSynchronize as hipMemset is asynchronous when destination memory is device memory - HIPCHECK(hipDeviceSynchronize()); - - // allocate and initialize memory on peer device - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipMalloc(&A_d1, Nbytes)); - HIPCHECK(hipMemset(A_d1, 0x13, Nbytes)); - // hipDeviceSynchronize as hipMemset is asynchronous when destination memory is device memory - HIPCHECK(hipDeviceSynchronize()); - - - //--- - // Enable peer access, for memory already allocated: - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); - - if (p_mirrorPeers) { - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); - assert(canAccessPeer); - - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); - } - - - //--- - // Copies to test functionality: - // Device0 push to device1, using P2P: - HIPCHECK(hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); - HIPCHECK(myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault, 0 /*stream*/, useAsyncCopy)); - - syncBothDevices(); // TODO - remove me, should handle this in implementation. - - // Copy data back to host: - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0 /*stream*/, useAsyncCopy)); - - syncBothDevices(); // TODO - remove me, should handle this in implementation. - - - //--- - // Check host data: - for (int i = 0; i < N; i++) { - if (A_h[i] != memsetval) { - failed("mismatch at index:%d computed:0x%02x, golden memsetval:0x%02x\n", i, - (int)A_h[i], (int)memsetval); - } - } - printf("==done: %s useAsyncCopy=%d\n\n", __func__, useAsyncCopy); -} - - -//--- -// Test which tests peer H2D copy - ie: copy-engine=1, dst=1, src=0 (Host) -// A_d0 is pinned host on dev0 (this) -// A_d1 is device memory on dev1 (peer) -// -void testPeerHostToDevice(bool useAsyncCopy) { - printf("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy); - - setupPeerTests(); - - // Always enable g_currentDevice to see the allocations on peerDevice. - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); - - if (p_mirrorPeers) { - // Mirror peers allows the peer device to see the allocations on currentDevice. - int canAccessPeer; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); - assert(canAccessPeer); - - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); - } - - size_t Nbytes = N * sizeof(char); - - char *A_host_d0, *A_d1; - char* A_h; - - A_h = (char*)malloc(Nbytes); - - // allocate and initialize memory on device0 - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipHostMalloc(&A_host_d0, Nbytes)); - HIPCHECK(hipMemset(A_host_d0, memsetval, Nbytes)); - - // allocate and initialize memory on peer device - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(hipMalloc(&A_d1, Nbytes)); - HIPCHECK(hipMemset(A_d1, 0x13, Nbytes)); - // hipDeviceSynchronize as hipMemset is asynchronous when destination memory is device memory - HIPCHECK(hipDeviceSynchronize()); - - bool firstAsyncCopy = useAsyncCopy; /*TODO - should be useAsyncCopy*/ - - syncBothDevices(); - - - // Device0 push to device1, using P2P: - // NOTE : if p_mirrorPeers=0 and p_memcpyWithPeer=1, then peer device does not have mapping for - // A_d1 and we need to use a - // a host staging copy for the P2P access. - if (p_memcpyWithPeer) { - // p_memcpyWithPeer=1 case is HostToDevice. - // if p_mirrorPeers = 1, this is accelerated copy over PCIe. - // if p_mirrorPeers = 0, this should fall back to host (because peer can't see A_host_d0) - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(myHipMemcpy(A_d1, A_host_d0, Nbytes, hipMemcpyHostToDevice, 0 /*stream*/, - firstAsyncCopy)); // This is P2P copy. - } else { - // p_memcpyWithPeer=0 case is HostToDevice. - // if p_mirrorPeers = 1, this is accelerated copy over PCIe. - // if p_mirrorPeers = 0, this should fall back to host (because device0 can't see A_d1) - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(myHipMemcpy(A_d1, A_host_d0, Nbytes, hipMemcpyHostToDevice, 0 /*stream*/, - firstAsyncCopy)); // This is P2P copy. - } - - syncBothDevices(); - - // Copy data back to host: - HIPCHECK(hipSetDevice(g_peerDevice)); - HIPCHECK(myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0 /*stream*/, useAsyncCopy)); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceSynchronize()); - - // Check host data: - for (int i = 0; i < N; i++) { - if (A_h[i] != memsetval) { - failed("mismatch at index:%d computed:0x%02x, golden memsetval:0x%02x\n", i, - (int)A_h[i], (int)memsetval); - } - } - - printf("==done: %s useAsyncCopy:%d\n\n", __func__, useAsyncCopy); -} - - -void simpleNegative() { - printf("\n==testing: %s\n", __func__); - - setupPeerTests(); - - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - - //--- - //-- self is not a peer - int canAccessPeer; - hipError_t e = hipDeviceCanAccessPeer(&canAccessPeer, deviceId, deviceId); - HIPASSERT(e == hipSuccess); // no error returned, it doesn't hurt to ask. - HIPASSERT(canAccessPeer == 0); // but self is not a peer. - - e = hipSuccess; - //--- - // Enable same device twice in a row: - HIPCHECK(hipSetDevice(g_currentDevice)); - HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); - e = (hipDeviceEnablePeerAccess(g_peerDevice, 0)); - HIPASSERT(e == hipErrorPeerAccessAlreadyEnabled); - - //--- - // try disabling twice in a row - HIPCHECK(hipDeviceDisablePeerAccess(g_peerDevice)); - e = (hipDeviceDisablePeerAccess(g_peerDevice)); - HIPASSERT(e == hipErrorPeerAccessNotEnabled); - - - // More tests here: - printf("==done: %s\n\n", __func__); -} - - -int main(int argc, char* argv[]) { - int ret_code = 0; - - do { - int gpuCount; - HIPCHECK(hipGetDeviceCount(&gpuCount)); - if (gpuCount < 2) { - printf("P2P application requires atleast 2 gpu devices\n"); - if (hip_skip_tests_enabled()) { - ret_code = hip_skip_retcode(); - } - break; //break from do while(0). - } - - int canAccessPeer; - for (int dev_idx = 0; dev_idx < (gpuCount-1); ++dev_idx) { - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, dev_idx, dev_idx + 1)); - if (canAccessPeer == 0) { - printf("P2P Access not available between GPUs %d and %d \n", dev_idx, dev_idx + 1); - if (hip_skip_tests_enabled()) { - ret_code = hip_skip_retcode(); - } - break; // break from for loop. - } - } - - if (canAccessPeer == 0) { - break; //break from do while(0). - } - - // Run the test case scenarios - parseMyArguments(argc, argv); - if (p_tests & 0x100) { - testPeerHostToDevice(false /*useAsyncCopy*/); - } - testPeerHostToDevice(true /*useAsyncCopy*/); - - if (p_tests & 0x1) { - enablePeerFirst(false /*useAsyncCopy*/); - } - - if (p_tests & 0x2) { - allocMemoryFirst(false /*useAsyncCopy*/); - } - - if (p_tests & 0x4) { - simpleNegative(); - } - - if (p_tests & 0x8) { - enablePeerFirst(true /*useAsyncCopy*/); - } - if (p_tests & 0x10) { - allocMemoryFirst(true /*useAsyncCopy*/); - } - } while (0); - - if (ret_code == 0 || ret_code == hip_skip_retcode()) { - passed(); - } - - return ret_code; -} diff --git a/tests/src/printf/hipPrintfAltForms.cpp b/tests/src/printf/hipPrintfAltForms.cpp deleted file mode 100644 index 5185b76785..0000000000 --- a/tests/src/printf/hipPrintfAltForms.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" - -__global__ void test_kernel() { - printf("%#o\n", 042); - printf("%#x\n", 0x42); - printf("%#X\n", 0x42); - printf("%#08x\n", 0x42); - printf("%#f\n", -123.456); -#ifdef __HIP_PLATFORM_AMD__ - printf("%#F\n", 123.456); -#else - printf("%#f\n", 123.456); // In Cuda, printf only supports "%cdiouxXpeEfgGaAs" -#endif - printf("%#e\n", 123.456); - printf("%#E\n", -123.456); - printf("%#g\n", -123.456); - printf("%#G\n", 123.456); - printf("%#a\n", 123.456); - printf("%#A\n", -123.456); - printf("%#.8x\n", 0x42); - printf("%#16.8x\n", 0x42); - printf("%-#16.8x\n", 0x42); -} - -int main(int argc, char **argv) { - std::string reference(R"here(042 -0x42 -0X42 -0x000042 --123.456000 -123.456000 -1.234560e+02 --1.234560E+02 --123.456 -123.456 -0x1.edd2f1a9fbe77p+6 --0X1.EDD2F1A9FBE77P+6 -0x00000042 - 0x00000042 -0x00000042 -)here"); - - CaptureStream capture(stdout); - - capture.Begin(); - hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string device_output = capture.getData(); - - HIPASSERT(device_output == reference); - passed(); -} diff --git a/tests/src/printf/hipPrintfBasic.cpp b/tests/src/printf/hipPrintfBasic.cpp deleted file mode 100644 index 6169dd01e4..0000000000 --- a/tests/src/printf/hipPrintfBasic.cpp +++ /dev/null @@ -1,327 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" -#include -#include - -// Global string constants don't work inside device functions, so we -// use a macro to repeat the declaration in host and device contexts. -DECLARE_DATA(); - -__global__ void kernel_uniform0(int *retval) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - retval[tid] = printf("Hello World\n"); // In Hip-Rocclr, printf returns number of characters printed. - // In Cuda, printf returns the number of arguments parsed. -} - -static void test_uniform0(int *retval, uint num_blocks, - uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_uniform0, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == strlen("Hello World\n")); -#else - HIPASSERT(retval[ii] == 0); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 1); - HIPASSERT(linecount["Hello World"] == num_threads); -} - -__global__ void kernel_uniform1(int *retval) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - retval[tid] = printf("Six times Eight is %d\n", 42); -} - -static void test_uniform1(int *retval, uint num_blocks, - uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_uniform1, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == strlen("Six times Eight is 42") + 1); -#else - HIPASSERT(retval[ii] == 1); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 1); - HIPASSERT(linecount["Six times Eight is 42"] == num_threads); -} - -__global__ void kernel_divergent0(int *retval) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - retval[tid] = printf("Thread ID: %d\n", tid); -} - -static void test_divergent0(int *retval, uint num_blocks, - uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_divergent0, dim3(num_blocks), - dim3(threads_per_block), 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != 10; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == 13); -#else - HIPASSERT(retval[ii] == 1); -#endif - } - - for (uint ii = 10; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == 14); -#else - HIPASSERT(retval[ii] == 1); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::vector threadIds; - for (std::string line; std::getline(dataStream, line);) { - auto pos = line.find(':'); - HIPASSERT(line.substr(0, pos) == "Thread ID"); - threadIds.push_back(std::stoul(line.substr(pos + 2))); - } - - std::sort(threadIds.begin(), threadIds.end()); - HIPASSERT(threadIds.size() == num_threads); - HIPASSERT(threadIds.back() == num_threads - 1); -} - -__global__ void kernel_divergent1(int *retval) { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid % 2) { - retval[tid] = printf("Hello World\n"); - } else { - retval[tid] = -1; - } -} - -static void test_divergent1(int *retval, uint num_blocks, - uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_divergent1, dim3(num_blocks), - dim3(threads_per_block), 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { - if (ii % 2) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == strlen("Hello World\n")); -#else - HIPASSERT(retval[ii] == 0); -#endif - } else { - HIPASSERT(retval[ii] == -1); - } - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 1); - HIPASSERT(linecount["Hello World"] == num_threads / 2); -} - -__global__ void kernel_series(int *retval) { - DECLARE_DATA(); - - const uint tid = threadIdx.x + blockIdx.x * blockDim.x; - int result = 0; - result += printf("%s\n", msg_long1); - result += printf("%s\n", msg_short); - result += printf("%s\n", msg_long2); - retval[tid] = result; -} - -static void test_series(int *retval, uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_series, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == - strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3); -#else - HIPASSERT(retval[ii] == 3); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 3); - HIPASSERT(linecount[msg_long1] == num_threads); - HIPASSERT(linecount[msg_long2] == num_threads); - HIPASSERT(linecount[msg_short] == num_threads); -} - -__global__ void kernel_divergent_loop() { - const uint tid = threadIdx.x + blockIdx.x * blockDim.x; - int result = 0; - - for (int i = 0; i <= tid; ++i) { - printf("%d\n", i); - } -} - -static void test_divergent_loop(uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - - capture.Begin(); - hipLaunchKernelGGL(kernel_divergent_loop, dim3(num_blocks), dim3(threads_per_block), - 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map count; - while (true) { - int i; - dataStream >> i; - if (dataStream.fail()) - break; - count[i]++; - } - - HIPASSERT(count.size() == num_threads); - for (int i = 0; i != num_threads; ++i) { - HIPASSERT(count[i] == num_threads - i); - } -} - -int main() { - uint num_blocks = 1; - uint threads_per_block = 64; - uint num_threads = num_blocks * threads_per_block; - - void *retval_void; - HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads)); - auto retval = reinterpret_cast(retval_void); - - test_uniform0(retval, num_blocks, threads_per_block); - test_uniform1(retval, num_blocks, threads_per_block); - test_divergent0(retval, num_blocks, threads_per_block); - test_divergent1(retval, num_blocks, threads_per_block); - test_series(retval, num_blocks, threads_per_block); - test_divergent_loop(num_blocks, threads_per_block); - - passed(); -} diff --git a/tests/src/printf/hipPrintfFlags.cpp b/tests/src/printf/hipPrintfFlags.cpp deleted file mode 100644 index c6d3c958ec..0000000000 --- a/tests/src/printf/hipPrintfFlags.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" - -__global__ void test_kernel() { - printf("%08d\n", 42); - printf("%08i\n", -42); - printf("%08u\n", 42); - printf("%08g\n", 123.456); - printf("%0+8d\n", 42); - printf("%+d\n", -42); - printf("%+08d\n", 42); - printf("%-8s\n", "xyzzy"); - printf("% i\n", -42); - printf("%-16.8d\n", 42); - printf("%16.8d\n", 42); -} - -int main(int argc, char **argv) { - std::string reference(R"here(00000042 --0000042 -00000042 -0123.456 -+0000042 --42 -+0000042 -xyzzy --42 -00000042 - 00000042 -)here"); - - CaptureStream capture(stdout); - - capture.Begin(); - hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string device_output = capture.getData(); - - HIPASSERT(device_output == reference); - passed(); -} diff --git a/tests/src/printf/hipPrintfManyDevices.cpp b/tests/src/printf/hipPrintfManyDevices.cpp deleted file mode 100644 index 004757cac3..0000000000 --- a/tests/src/printf/hipPrintfManyDevices.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" - -DECLARE_DATA(); - -__global__ void print_things() { - DECLARE_DATA(); - - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - const char *msg[] = {msg_short, msg_long1, msg_long2}; - - printf("%s\n", msg[tid % 3]); - if (tid % 3 == 0) - printf("%s\n", msg_short); - printf("%s\n", msg[(tid + 1) % 3]); - printf("%s\n", msg[(tid + 2) % 3]); -} - -size_t get_things_size(uint threads_per_device, uint num_devices) { - DECLARE_DATA(); - const char *msg[] = {msg_short, msg_long1, msg_long2}; - uint num_threads = threads_per_device * num_devices; - size_t size = 0; - - for(auto str: msg) { - size += strlen(str) + 1; - } - - size *= num_threads; - size += ((threads_per_device + 2) / 3) * num_devices * (strlen(msg_short) + 1); - - return size; -} - -int main() { - uint num_blocks = 14; - uint threads_per_block = 250; - uint threads_per_device = num_blocks * threads_per_block; - - CaptureStream capture(stdout); - - int num_devices = 0; - hipGetDeviceCount(&num_devices); -#ifdef __HIP_PLATFORM_NVIDIA__ - // By default, Cuda has different printf ring buffer size in different GPUs(or ENVs). - // For example, A100 has 7M, Quadro RTX 5000 has 1.5M, GeForce RTX 2070 Supper has 1.3M in tests. - // We have to detect, compare and set it - size_t size = get_things_size(threads_per_device, num_devices); - size_t size_expected = size * 4; // Cuda printf buffer format is unknown, but test shows 4 times can work here. - size_t size_current = 0; - HIPCHECK(hipDeviceGetLimit(&size_current, hipLimitPrintfFifoSize)); - printf("things size = %zu, expected %zu, current %zu\n", size, size_expected, size_current); - - if(size_current < size_expected) { - HIPCHECK(hipDeviceSetLimit(hipLimitPrintfFifoSize, size_expected)); - } -#endif - capture.Begin(); - for (int i = 0; i != num_devices; ++i) { - hipSetDevice(i); - hipLaunchKernelGGL(print_things, dim3(num_blocks), dim3(threads_per_block), - 0, 0); - hipDeviceSynchronize(); - } - capture.End(); - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - uint num_threads = threads_per_device * num_devices; - HIPASSERT(linecount.size() == 3); - HIPASSERT(linecount[msg_long1] == num_threads); - HIPASSERT(linecount[msg_long2] == num_threads); - HIPASSERT(linecount[msg_short] == - num_threads + ((threads_per_device + 2) / 3) * num_devices); - passed(); -} diff --git a/tests/src/printf/hipPrintfManyTypes.cpp b/tests/src/printf/hipPrintfManyTypes.cpp deleted file mode 100644 index c7d88d47d7..0000000000 --- a/tests/src/printf/hipPrintfManyTypes.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hipPrintfUtil.h" -#include "../test_common.h" - -__global__ void test_kernel_int(const char* format, int val) { - printf(format, val); -} - - -__global__ void test_kernel_float(const char* format, float val) { - printf(format, val); -} - - -__global__ void test_kernel_float_limits(int TestNum) { - if (TestNum == 0) { - printf("%f", 1.0f/0.0f); - } - if (TestNum == 1) { - printf("%f", sqrt(-1.0f)); - } - if (TestNum == 2) { - printf("%f", acos(2.0f)); - } -} - -__global__ void test_kernel_octal(const char* format, const unsigned long int val) { - printf(format, val); -} - -__global__ void test_kernel_unsigned(const char* format, const unsigned long int val) { - printf(format, val); -} - -__global__ void test_kernel_char(int TestNum) { - if (TestNum == 0) { - printf("%4c", '1'); - } - if (TestNum == 1) { - printf("%-4c", '1'); - } - if (TestNum == 2) { - printf("%c", 66); - } -} - -__global__ void test_kernel_string(int TestNum) { - if (TestNum == 0) { - printf("%4s", "foo"); - } - if (TestNum == 1) { - printf("%.1s", "foo"); - } - if (TestNum == 2) { - printf("%s","%%"); - } -} - -void BuildCorrectOutput(std::vector* CorrectBuff, size_t TestId) { - for (auto ¶ms:allTestCase[TestId]->_genParameters) { - char refResult[256]; - if (allTestCase[TestId]->printFN == NULL) - continue; - allTestCase[TestId]->printFN(params, refResult, 256); - CorrectBuff->push_back(refResult); - } -} - -bool TestKernel(const unsigned int TestId, const unsigned int TestNum) { - - CaptureStream capture(stdout); - - capture.Begin(); - - switch (allTestCase[TestId]->_type) { - case TYPE_INT: { - const char* Format; - size_t FormatLen = std::strlen(allTestCase[TestId]->_genParameters[TestNum].genericFormat); - hipMalloc((void**)&Format, sizeof(char)*FormatLen); - hipMemcpyHtoD((void*)Format, (void*)allTestCase[TestId]->_genParameters[TestNum].genericFormat , sizeof(char)*FormatLen); - hipLaunchKernelGGL(test_kernel_int, dim3(1), dim3(1), 0, 0, Format, - atoi(allTestCase[TestId]->_genParameters[TestNum].dataRepresentation)); - break; - } - case TYPE_FLOAT: { - const char* Format; - size_t FormatLen = std::strlen(allTestCase[TestId]->_genParameters[TestNum].genericFormat); - hipMalloc((void**)&Format, sizeof(char)*FormatLen); - hipMemcpyHtoD((void*)Format, (void*)allTestCase[TestId]->_genParameters[TestNum].genericFormat , sizeof(char)*FormatLen); - float val = strtof(allTestCase[TestId]->_genParameters[TestNum].dataRepresentation, NULL); - hipLaunchKernelGGL(test_kernel_float, dim3(1), dim3(1), 0, 0, Format, val); - break; - } - case TYPE_FLOAT_LIMITS: { - hipLaunchKernelGGL(test_kernel_float_limits, dim3(1), dim3(1), 0, 0, TestNum); - break; - } - case TYPE_OCTAL: { - const char* Format; - size_t FormatLen = std::strlen(allTestCase[TestId]->_genParameters[TestNum].genericFormat); - hipMalloc((void**)&Format, sizeof(char)*FormatLen); - hipMemcpyHtoD((void*)Format, (void*)allTestCase[TestId]->_genParameters[TestNum].genericFormat , sizeof(char)*FormatLen); - const unsigned long int data = strtoul(allTestCase[TestId]->_genParameters[TestNum].dataRepresentation, NULL, 10); - hipLaunchKernelGGL(test_kernel_octal, dim3(1), dim3(1), 0, 0, Format, data); - break; - } - case TYPE_UNSIGNED: { - const char* Format; - size_t FormatLen = std::strlen(allTestCase[TestId]->_genParameters[TestNum].genericFormat); - hipMalloc((void**)&Format, sizeof(char)*FormatLen); - hipMemcpyHtoD((void*)Format, (void*)allTestCase[TestId]->_genParameters[TestNum].genericFormat , sizeof(char)*FormatLen); - const unsigned long int data = strtoul(allTestCase[TestId]->_genParameters[TestNum].dataRepresentation, NULL, 10); - hipLaunchKernelGGL(test_kernel_unsigned, dim3(1), dim3(1), 0, 0, Format, data); - break; - } - case TYPE_HEXADEC: { - const char* Format; - size_t FormatLen = std::strlen(allTestCase[TestId]->_genParameters[TestNum].genericFormat); - hipMalloc((void**)&Format, sizeof(char)*FormatLen); - hipMemcpyHtoD((void*)Format, (void*)allTestCase[TestId]->_genParameters[TestNum].genericFormat , sizeof(char)*FormatLen); - const unsigned long int data = strtoul(allTestCase[TestId]->_genParameters[TestNum].dataRepresentation, NULL, 0); - hipLaunchKernelGGL(test_kernel_unsigned, dim3(1), dim3(1), 0, 0, Format, data); - break; - } - case TYPE_CHAR: { - hipLaunchKernelGGL(test_kernel_char, dim3(1), dim3(1), 0, 0, TestNum); - break; - } - case TYPE_STRING: { - hipLaunchKernelGGL(test_kernel_string, dim3(1), dim3(1), 0, 0, TestNum); - break; - } - default: { - return false; - } - } - - hipDeviceSynchronize(); - capture.End(); - std::string device_output = capture.getData(); - char* exp; - //Exponenent representation - if ((exp = std::strstr((char*)device_output.c_str(),"E+")) != NULL || (exp = std::strstr((char*)device_output.c_str(),"e+")) != NULL - || (exp = std::strstr((char*)device_output.c_str(),"E-")) != NULL || (exp = std::strstr((char*)device_output.c_str(),"e-")) != NULL) { - - char correctExp[3]={0}; - std::strncpy(correctExp,exp,2); - - char* eCorrectBuffer = strstr((char*)allTestCase[TestId]->_correctBuffer[TestNum].c_str(),correctExp); - if (eCorrectBuffer == NULL) { - return false; - } - eCorrectBuffer+=2; - exp += 2; - //Exponent always contains at least two digits - if (strlen(exp) < 2) { - return false; - } - //Skip leading zeros in the exponent - while (*exp == '0') { - ++exp; - } - while (*eCorrectBuffer == '0') { - ++eCorrectBuffer; - } - if (std::strcmp(eCorrectBuffer,exp)) { - return false; - } - } - if (!std::strcmp(allTestCase[TestId]->_correctBuffer[TestNum].c_str(),"inf")) { - if (!std::strcmp(device_output.c_str(),"inf")||!std::strcmp(device_output.c_str(),"infinity") - || !std::strcmp(device_output.c_str(),"1.#INF00")&&std::strcmp(device_output.c_str(),"Inf")) { - return true; - } - } - if (!std::strcmp(allTestCase[TestId]->_correctBuffer[TestNum].c_str(),"nan") - || !std::strcmp(allTestCase[TestId]->_correctBuffer[TestNum].c_str(),"-nan")) { - if (!std::strcmp(device_output.c_str(),"nan")||!std::strcmp(device_output.c_str(),"-nan") - || !std::strcmp(device_output.c_str(),"1.#IND00")||!std::strcmp(device_output.c_str(),"-1.#IND00") - || !std::strcmp(device_output.c_str(),"NaN")||!std::strcmp(device_output.c_str(),"nan(ind)") - || !std::strcmp(device_output.c_str(),"nan(snan)")||!std::strcmp(device_output.c_str(),"-nan(ind)")) { - return true; - } - } - std::cout<_correctBuffer[TestNum]<_correctBuffer[TestNum].c_str())) { - return false; - } - return true; -} - -#define MAX_TYPES 8 -int main(int argc, char **argv) { - - const char* arg = argv[1]; - bool TestpPass; - if (!std::strcmp(arg,"--All") || !std::strcmp(arg," ")) { - for (int i=0; i < TYPE_COUNT; i++) { - BuildCorrectOutput(&allTestCase[i]->_correctBuffer, i); - for (int j=0; j < allTestCase[i]->numOfTests; j++) { - TestpPass = TestKernel(i,j); - HIPASSERT(TestpPass == true); - } - } - } else { - char* input[] = {"--INT", "--FLOAT", "--FLOAT_LIMITS", "--OCTAL", - "--UNSIGNED", "--HEXADEC", "--CHAR", "--STRING"}; - for (int i=0; i < MAX_TYPES; i++) { - if (!std::strcmp(arg, input[i])) { - BuildCorrectOutput(&allTestCase[i]->_correctBuffer, i); - for (int j=0; j < allTestCase[i]->numOfTests; j++) { - TestpPass = TestKernel(i,j); - HIPASSERT(TestpPass == true); - } - } - } - } - passed(); -} diff --git a/tests/src/printf/hipPrintfManyWaves.cpp b/tests/src/printf/hipPrintfManyWaves.cpp deleted file mode 100644 index 3bcb7fb4c1..0000000000 --- a/tests/src/printf/hipPrintfManyWaves.cpp +++ /dev/null @@ -1,382 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" -#include -#include - -// Global string constants don't work inside device functions, so we -// use a macro to repeat the declaration in host and device contexts. -DECLARE_DATA(); - -__global__ void kernel_mixed0(int *retval) { - DECLARE_DATA(); - - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - - // Three strings passed as divergent values to the same hostcall. - const char *msg; - switch (tid % 3) { - case 0: - msg = msg_short; - break; - case 1: - msg = msg_long1; - break; - case 2: - msg = msg_long2; - break; - } - - retval[tid] = printf("%s\n", msg); -} - -static void test_mixed0(int *retval, uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_mixed0, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - switch (ii % 3) { - case 0: - HIPASSERT(retval[ii] == strlen(msg_short) + 1); - break; - case 1: - HIPASSERT(retval[ii] == strlen(msg_long1) + 1); - break; - case 2: - HIPASSERT(retval[ii] == strlen(msg_long2) + 1); - break; - } -#else - HIPASSERT(retval[ii] == 1); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 3); - HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); - HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3); - HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3); -} - -__global__ void kernel_mixed1(int *retval) { - DECLARE_DATA(); - - const uint tid = threadIdx.x + blockIdx.x * blockDim.x; - - // Three strings passed to divergent hostcalls. - switch (tid % 3) { - case 0: - retval[tid] = printf("%s\n", msg_short); - break; - case 1: - retval[tid] = printf("%s\n", msg_long1); - break; - case 2: - retval[tid] = printf("%s\n", msg_long2); - break; - } -} - -static void test_mixed1(int *retval, uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_mixed1, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - switch (ii % 3) { - case 0: - HIPASSERT(retval[ii] == strlen(msg_short) + 1); - break; - case 1: - HIPASSERT(retval[ii] == strlen(msg_long1) + 1); - break; - case 2: - HIPASSERT(retval[ii] == strlen(msg_long2) + 1); - break; - } -#else - HIPASSERT(retval[ii] == 1); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 3); - HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); - HIPASSERT(linecount[msg_long1] == (num_threads + 1) / 3); - HIPASSERT(linecount[msg_long2] == (num_threads + 0) / 3); -} - -__global__ void kernel_mixed2(int *retval) { - DECLARE_DATA(); - - const uint tid = threadIdx.x + blockIdx.x * blockDim.x; - - // Three different strings. All workitems print all three, but - // in different orders. - const char *msg[] = {msg_short, msg_long1, msg_long2}; - retval[tid] = - printf("%s%s%s\n", msg[tid % 3], msg[(tid + 1) % 3], msg[(tid + 2) % 3]); -} - -static void test_mixed2(int *retval, uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_mixed2, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - HIPASSERT(retval[ii] == - strlen(msg_short) + strlen(msg_long1) + strlen(msg_long2) + 1); -#else - HIPASSERT(retval[ii] == 3); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - std::string str1 = - std::string(msg_short) + std::string(msg_long1) + std::string(msg_long2); - std::string str2 = - std::string(msg_long1) + std::string(msg_long2) + std::string(msg_short); - std::string str3 = - std::string(msg_long2) + std::string(msg_short) + std::string(msg_long1); - - HIPASSERT(linecount.size() == 3); - HIPASSERT(linecount[str1] == (num_threads + 2) / 3); - HIPASSERT(linecount[str2] == (num_threads + 1) / 3); - HIPASSERT(linecount[str3] == (num_threads + 0) / 3); -} - -__global__ void kernel_mixed3(int *retval) { - DECLARE_DATA(); - - const uint tid = threadIdx.x + blockIdx.x * blockDim.x; - int result = 0; - - result += printf("%s\n", msg_long1); - if (tid % 3 == 0) { - result += printf("%s\n", msg_short); - } - result += printf("%s\n", msg_long2); - - retval[tid] = result; -} - -size_t get_mixed3_size(uint num_threads) { - DECLARE_DATA(); - const char *msg[] = {msg_long1, msg_long2}; - size_t size = 0; - - for(auto str: msg) { - size += strlen(str) + 1; - } - - size *= num_threads; - size += ((num_threads + 2) / 3) * (strlen(msg_short) + 1); - - return size; -} - -static void test_mixed3(int *retval, uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - - uint num_threads = num_blocks * threads_per_block; - for (uint i = 0; i != num_threads; ++i) { - retval[i] = 0x23232323; - } - - capture.Begin(); - hipLaunchKernelGGL(kernel_mixed3, dim3(num_blocks), dim3(threads_per_block), - 0, 0, retval); - hipStreamSynchronize(0); - capture.End(); - - for (uint ii = 0; ii != num_threads; ++ii) { -#ifdef __HIP_PLATFORM_AMD__ - if (ii % 3 == 0) { - HIPASSERT(retval[ii] == - strlen(msg_long1) + strlen(msg_short) + strlen(msg_long2) + 3); - } else { - HIPASSERT(retval[ii] == strlen(msg_long1) + strlen(msg_long2) + 2); - } -#else - HIPASSERT(retval[ii] == (ii % 3 ? 2 : 3)); -#endif - } - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::map linecount; - for (std::string line; std::getline(dataStream, line);) { - linecount[line]++; - } - - HIPASSERT(linecount.size() == 3); - HIPASSERT(linecount[msg_long1] == num_threads); - HIPASSERT(linecount[msg_long2] == num_threads); - HIPASSERT(linecount[msg_short] == (num_threads + 2) / 3); -} - -__global__ void kernel_numbers() { - uint tid = threadIdx.x + blockIdx.x * blockDim.x; - for (uint i = 0; i != 7; ++i) { - uint base = tid * 21 + i * 3; - printf("%d %d %d\n", base, base + 1, base + 2); - } -} - -size_t get_numbers_size(uint num_threads) { - char buf[100] = { 0 }; - size_t size = 0; - for (uint tid = 0; tid < num_threads; tid++) { - for (uint i = 0; i != 7; ++i) { - uint base = tid * 21 + i * 3; - size += snprintf(buf, 100, "%d %d %d\n", base, base + 1, base + 2); - } - } - return size; -} - -static void test_numbers(uint num_blocks, uint threads_per_block) { - CaptureStream capture(stdout); - uint num_threads = num_blocks * threads_per_block; - - capture.Begin(); - hipLaunchKernelGGL(kernel_numbers, dim3(num_blocks), dim3(threads_per_block), - 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string data = capture.getData(); - std::stringstream dataStream; - dataStream << data; - - std::vector points; - while (true) { - uint i; - dataStream >> i; - if (dataStream.fail()) - break; - points.push_back(i); - } - - std::sort(points.begin(), points.end()); - points.erase(std::unique(points.begin(), points.end()), points.end()); - HIPASSERT(points.size() == 21 * num_threads); - HIPASSERT(points.back() == 21 * num_threads - 1); -} - -int main(int argc, char **argv) { - uint num_blocks = 150; - uint threads_per_block = 250; - uint num_threads = num_blocks * threads_per_block; -#ifdef __HIP_PLATFORM_NVIDIA__ - // By default, Cuda has different printf ring buffer size in different GPUs(or ENVs). - // For example, A100 has 7M, Quadro RTX 5000 has 1.5M, GeForce RTX 2070 Supper has 1.3M in tests. - // We have to detect, compare and set it - size_t size_mixed3 = get_mixed3_size(num_threads); - size_t size_numbers = get_numbers_size(num_threads); - size_t size_max = size_mixed3 >= size_numbers ? size_mixed3 : size_numbers; // Max size - size_t size_expected = size_max * 10; // Cuda printf buffer format is unknown, but test shows 10 times can work here. - size_t size_current = 0; - HIPCHECK(hipDeviceGetLimit(&size_current, hipLimitPrintfFifoSize)); - printf("size_mixed3 = %zu, size_numbers = %zu\n", size_mixed3, size_numbers); - printf("max size = %zu, expected %zu, current %zu\n", size_max, size_expected, size_current); - - if(size_current < size_expected) { - HIPCHECK(hipDeviceSetLimit(hipLimitPrintfFifoSize, size_expected)); - } -#endif - void *retval_void; - HIPCHECK(hipHostMalloc(&retval_void, 4 * num_threads)); - auto retval = reinterpret_cast(retval_void); - - test_mixed0(retval, num_blocks, threads_per_block); - test_mixed1(retval, num_blocks, threads_per_block); - test_mixed2(retval, num_blocks, threads_per_block); - test_mixed3(retval, num_blocks, threads_per_block); - test_numbers(num_blocks, threads_per_block); - passed(); -} diff --git a/tests/src/printf/hipPrintfSpecifiers.cpp b/tests/src/printf/hipPrintfSpecifiers.cpp deleted file mode 100644 index b3f62f4c86..0000000000 --- a/tests/src/printf/hipPrintfSpecifiers.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" - -__global__ void test_kernel() { - const char *N = nullptr; - const char *s = "hello world"; - printf("xyzzy\n"); - printf("%%\n"); - printf("hello %% world\n"); - printf("%%s\n"); - // Two special tests to make sure that the compiler pass correctly - // skips over a '%%' without affecting the logic for locating - // string arguments. - printf("%%s%p\n", (void *)0xf01dab1eca55e77e); - printf("%%c%s\n", "xyzzy"); - printf("%c%c%c\n", 's', 'e', 'p'); - printf("%d\n", -42); - printf("%u\n", 42); - printf("%f\n", 123.456); -#ifdef __HIP_PLATFORM_AMD__ - printf("%F\n", -123.456); -#else - printf("%f\n", -123.456); -#endif - printf("%e\n", -123.456); - printf("%E\n", 123.456); - printf("%g\n", 123.456); - printf("%G\n", -123.456); - printf("%c\n", 'x'); - printf("%s\n", N); - printf("%p\n", N); -#ifdef __HIP_PLATFORM_AMD__ - printf("%.*f %*.*s %p\n", 8, 3.14159, 8, 5, s, (void *)0xf01dab1eca55e77e); -#else - // In Cuda, printf doesn't support %.*, %*.* - printf("%.8f %8.5s %p\n", 3.14159, s, (void *)0xf01dab1eca55e77e); -#endif -} - -int main(int argc, char **argv) { -#ifdef __HIP_PLATFORM_NVIDIA__ - std::string reference(R"here(xyzzy -% -hello % world -%s -%s0xf01dab1eca55e77e -%cxyzzy -sep --42 -42 -123.456000 --123.456000 --1.234560e+02 -1.234560E+02 -123.456 --123.456 -x -(null) -(nil) -3.14159000 hello 0xf01dab1eca55e77e -)here"); -#elif !defined(_WIN32) - std::string reference(R"here(xyzzy -% -hello % world -%s -%s0xf01dab1eca55e77e -%cxyzzy -sep --42 -42 -123.456000 --123.456000 --1.234560e+02 -1.234560E+02 -123.456 --123.456 -x - -(nil) -3.14159000 hello 0xf01dab1eca55e77e -)here"); -#else - std::string reference(R"here(xyzzy -% -hello % world -%s -%sF01DAB1ECA55E77E -%cxyzzy -sep --42 -42 -123.456000 --123.456000 --1.234560e+02 -1.234560E+02 -123.456 --123.456 -x - -0000000000000000 -3.14159000 hello F01DAB1ECA55E77E -)here"); -#endif - - CaptureStream capture(stdout); - - capture.Begin(); - hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string device_output = capture.getData(); - - HIPASSERT(device_output == reference); - passed(); -} diff --git a/tests/src/printf/hipPrintfStar.cpp b/tests/src/printf/hipPrintfStar.cpp deleted file mode 100644 index 990c6af173..0000000000 --- a/tests/src/printf/hipPrintfStar.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" - -__global__ void test_kernel() { - printf("%*d\n", 16, 42); -#ifdef __HIP_PLATFORM_AMD__ - printf("%.*d\n", 8, 42); - printf("%*.*d\n", -16, 8, 42); - printf("%*.*f %s * %.*s\n", 16, 8, 123.456, "hello", 5, "worldxyz"); -#else - // In Cuda, printf doesn't support %.*, %*.* - printf("%.8d\n", 42); - printf("%-16.8d\n", 42); - printf("%16.8f %s * %.5s\n", 123.456, "hello", "worldxyz"); -#endif -} - -int main(int argc, char **argv) { - std::string reference(R"here( 42 -00000042 -00000042 - 123.45600000 hello * world -)here"); - - CaptureStream capture(stdout); - - capture.Begin(); - hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string device_output = capture.getData(); - - HIPASSERT(device_output == reference); - passed(); -} diff --git a/tests/src/printf/hipPrintfUtil.h b/tests/src/printf/hipPrintfUtil.h deleted file mode 100644 index 2a642948e8..0000000000 --- a/tests/src/printf/hipPrintfUtil.h +++ /dev/null @@ -1,513 +0,0 @@ -/* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef _PRINTFUTIL_H_ -#define _PRINTFUTIL_H_ - -#include "printf_common.h" -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - -#include -#include -#include - -#include -#include // std::string -#include // std::cout -#include // std::stringstream, std::stringbu -#include -#include - -static std::vector correctBufferInt; -static std::vector correctBufferFloat; -static std::vector correctBufferOctal; -static std::vector correctBufferUnsigned; -static std::vector correctBufferHexadecimal; - -enum PrintfTestType { - TYPE_INT, - TYPE_FLOAT, - TYPE_FLOAT_LIMITS, - TYPE_OCTAL, - TYPE_UNSIGNED, - TYPE_HEXADEC, - TYPE_CHAR, - TYPE_STRING, - TYPE_VECTOR, - TYPE_ADDRESS_SPACE, - TYPE_COUNT -}; - -typedef enum { - kuchar = 0, - kchar = 1, - kushort = 2, - kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kulong = 7, - klong = 8, - kdouble = 9, - kvector = 10, - kTypeCount // always goes last -} Type; - -struct printDataGenParameters { - const char* genericFormat; - const char* dataRepresentation; - const char* vectorFormatFlag; - const char* vectorFormatSpecifier; - const char* dataType; - const char* vectorSize; - const char* addrSpaceArgumentTypeQualifier; - const char* addrSpaceVariableTypeQualifier; - const char* addrSpaceParameter; - const char* addrSpacePAdd; -}; - -struct testCase { - enum PrintfTestType _type; //(data)type for test - std::vector& _correctBuffer; //look-up table for correct results for printf - std::vector& _genParameters; //auxiliary data to build the code for kernel source - void (*printFN)(printDataGenParameters&, - char*, - const size_t); //function pointer for generating reference results - Type dataType; //the data type that will be printed during reference result generation (used for setting rounding mode) - int numOfTests; // num of test performed based on genparam array elements -}; - -static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { - snprintf(refResult, refSize, params.genericFormat, atoi(params.dataRepresentation)); -} - -static void floatRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { - snprintf(refResult, refSize, params.genericFormat, strtof(params.dataRepresentation, NULL)); -} - -static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { - const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10); - snprintf(refResult, refSize, params.genericFormat, data); -} - -static void unsignedRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { - const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10); - snprintf(refResult, refSize, params.genericFormat, data); -} - -static void hexRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize) { - const unsigned long int data = strtoul(params.dataRepresentation, NULL, 0); - snprintf(refResult, refSize, params.genericFormat, data); -} - -//================================== - -// int - -//================================== - -//------------------------------------------------------ - -// [string] format | [string] int-data representation | - -//------------------------------------------------------ - -std::vector printIntGenParameters = { - //(Minimum)Five-wide,default(right)-justified - {"%5d","10"}, - //(Minimum)Five-wide,left-justified - {"%-5d","10"}, - //(Minimum)Five-wide,default(right)-justified,zero-filled - {"%05d","10"}, - //(Minimum)Five-wide,default(right)-justified,with sign - {"%+5d","10"}, - //(Minimum)Five-wide ,left-justified,with sign - {"%-+5d","10"}, - //(Minimum)Five-digit(zero-filled in absent digits),default(right)-justified - {"%.5i","100"}, - //(Minimum)Six-wide,Five-digit(zero-filled in absent digits),default(right)-justified - {"%6.5i","100"}, - //0 and - flag both apper ==>0 is ignored,left-justified,capital I - {"%-06i","100"}, - //(Minimum)Six-wide,Five-digit(zero-filled in absent digits),default(right)-justified - {"%06.5i","100"} -}; - -testCase testCaseInt = { - TYPE_INT, - correctBufferInt, - printIntGenParameters, - intRefBuilder, - kint, - 9 -}; - - -//============================================== - -// float - -//============================================== - - - -//-------------------------------------------------------- - -// [string] format | [string] float-data representation | - -//-------------------------------------------------------- - -std::vector printFloatGenParameters = { - //Default(right)-justified - {"%f","10.3456"}, - //One position after the decimal,default(right)-justified - {"%.1f","10.3456"}, - //Two positions after the decimal,default(right)-justified - {"%.2f","10.3456"}, - //(Minimum)Eight-wide,three positions after the decimal,default(right)-justified - {"%8.3f","10.3456"}, - //(Minimum)Eight-wide,two positions after the decimal,zero-filled,default(right)-justified - {"%08.2f","10.3456"}, - //(Minimum)Eight-wide,two positions after the decimal,left-justified - {"%-8.2f","10.3456"}, - //(Minimum)Eight-wide,two positions after the decimal,with sign,default(right)-justified - {"%+8.2f","-10.3456"}, - //Zero positions after the decimal([floor]rounding),default(right)-justified - {"%.0f","0.1"}, - //Zero positions after the decimal([ceil]rounding),default(right)-justified - {"%.0f","0.6"}, - //Zero-filled,default positions number after the decimal,default(right)-justified - {"%0f","0.6"}, - //Double argument representing floating-point,used by f style,default(right)-justified - {"%4g","12345.6789"}, - //Double argument representing floating-point,used by e style,default(right)-justified - {"%4.2g","12345.6789"}, - //Double argument representing floating-point,used by f style,default(right)-justified - {"%4G","0.0000023"}, - //Double argument representing floating-point,used by e style,default(right)-justified - {"%4G","0.023"}, - //Double argument representing floating-point,with exponent,left-justified,default(right)-justified - {"%-#20.15e","789456123.0"}, - //Double argument representing floating-point,with exponent,left-justified,with sign,capital E,default(right)-justified ???? - {"%+#21.15E","789456123.0"}, - //Double argument representing floating-point,in [-]xh.hhhhpAd style - {"%.6a","0.1"}, - //(Minimum)Ten-wide,Double argument representing floating-point,in xh.hhhhpAd style,default(right)-justified - {"%10.2a","9990.235"}, -}; - -//--------------------------------------------------------- - -//Test case for float | - -//--------------------------------------------------------- - -testCase testCaseFloat = { - TYPE_FLOAT, - correctBufferFloat, - printFloatGenParameters, - floatRefBuilder, - kfloat, - 18 -}; - - -//============================================== - -// float limits - -//============================================== - - - -//-------------------------------------------------------- - -// [string] format | [string] float-data representation | - -//-------------------------------------------------------- - - -std::vector printFloatLimitsGenParameters = { - //Infinity (1.0/0.0) - {"%f","1.0f/0.0f"}, - //NaN - {"%f","sqrt(-1.0f)"}, - //NaN - {"%f","acos(2.0f)"} -}; -//-------------------------------------------------------- - -// Lookup table - [string]float-correct buffer | - -//-------------------------------------------------------- - -std::vector correctBufferFloatLimits = { - "inf", - "-nan", - "nan" -}; - -//--------------------------------------------------------- - -//Test case for float | - -//--------------------------------------------------------- - -testCase testCaseFloatLimits = { - TYPE_FLOAT_LIMITS, - correctBufferFloatLimits, - printFloatLimitsGenParameters, - NULL, - kfloat, - 3 -}; - -//========================================================= - -// octal - -//========================================================= - - - -//--------------------------------------------------------- - -// [string] format | [string] octal-data representation | - -//--------------------------------------------------------- - -std::vector printOctalGenParameters = { - //Default(right)-justified - {"%o","10"}, - //Five-digit,default(right)-justified - {"%.5o","10"}, - //Default(right)-justified,increase precision - {"%#o","100000000"}, - //(Minimum)Four-wide,Five-digit,0-flag ignored(because of precision),default(right)-justified - {"%04.5o","10"} -}; - -//------------------------------------------------------- - -//Test case for octal | - -//------------------------------------------------------- - -testCase testCaseOctal = { - TYPE_OCTAL, - correctBufferOctal, - printOctalGenParameters, - octalRefBuilder, - kulong, - 4 -}; - - - -//========================================================= - -// unsigned - -//========================================================= - - - -//--------------------------------------------------------- - -// [string] format | [string] unsined-data representation | - -//--------------------------------------------------------- - -std::vector printUnsignedGenParameters = { - //Default(right)-justified - {"%u","10"}, -}; - -//------------------------------------------------------- - -//Test case for octal | - -//------------------------------------------------------- - -testCase testCaseUnsigned = { - TYPE_UNSIGNED, - correctBufferUnsigned, - printUnsignedGenParameters, - unsignedRefBuilder, - kulong, - 1 -}; - - - -//======================================================= - -// hexadecimal - -//======================================================= - - - -//-------------------------------------------------------------- - -// [string] format | [string] hexadecimal-data representation | - -//-------------------------------------------------------------- - -std::vector printHexadecimalGenParameters = { - //Add 0x,low x,default(right)-justified - {"%#x","0xABCDEF"}, - //Add 0x,capital X,default(right)-justified - {"%#X","0xABCDEF"}, - //Not add 0x,if zero,default(right)-justified - {"%#X","0"}, - //(Minimum)Eight-wide,default(right)-justified - {"%8x","399"}, - //(Minimum)Four-wide,zero-filled,default(right)-justified - {"%04x","399"} -}; - -//-------------------------------------------------------------- - -//Test case for hexadecimal | - -//-------------------------------------------------------------- - -testCase testCaseHexadecimal = { - TYPE_HEXADEC, - correctBufferHexadecimal, - printHexadecimalGenParameters, - hexRefBuilder, - kulong, - 5 -}; - - - -//============================================================= - -// char - -//============================================================= - - - -//----------------------------------------------------------- - -// [string] format | [string] string-data representation | - -//----------------------------------------------------------- - -std::vector printCharGenParameters = { - //Four-wide,zero-filled,default(right)-justified - {"%4c","'1'"}, - //Four-wide,left-justified - {"%-4c","\'1\'"}, - //(unsigned) int argument,default(right)-justified - {"%c","66"} -}; - -//--------------------------------------------------------- - -// Lookup table -[string] char-correct buffer | - -//--------------------------------------------------------- - -std::vector correctBufferChar = { - " 1", - "1 ", - "B", -}; - - - - -//---------------------------------------------------------- - -//Test case for char | - -//---------------------------------------------------------- - -testCase testCaseChar = { - TYPE_CHAR, - correctBufferChar, - printCharGenParameters, - NULL, - kchar, - 3 -}; - -//========================================================== - -// string - -//========================================================== - - - -//-------------------------------------------------------- - -// [string]format | [string] string-data representation | - -//-------------------------------------------------------- - -std::vector printStringGenParameters = { - //(Minimum)Four-wide,zero-filled,default(right)-justified - {"%4s","\"foo\""}, - //One-digit(precision ignored),left-justified - {"%.1s","\"foo\""}, - //%% specification - {"%s","\"%%\""}, -}; - -//--------------------------------------------------------- - -// Lookup table -[string] string-correct buffer | - -//--------------------------------------------------------- - -std::vector correctBufferString = { - " foo", - "f", - "%%", -}; - - -//--------------------------------------------------------- - -//Test case for string | - -//--------------------------------------------------------- - -testCase testCaseString = { - TYPE_STRING, - correctBufferString, - printStringGenParameters, - NULL, - kchar, - 3 -}; - -std::vector allTestCase = {&testCaseInt, &testCaseFloat, &testCaseFloatLimits, &testCaseOctal, &testCaseUnsigned, - &testCaseHexadecimal, &testCaseChar, &testCaseString}; -#endif \ No newline at end of file diff --git a/tests/src/printf/hipPrintfWidthPrecision.cpp b/tests/src/printf/hipPrintfWidthPrecision.cpp deleted file mode 100644 index 6dca46178e..0000000000 --- a/tests/src/printf/hipPrintfWidthPrecision.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "printf_common.h" - -__global__ void test_kernel() { - printf("%16d\n", 42); - printf("%.8d\n", 42); - printf("%16.5d\n", -42); - printf("%.8x\n", 0x42); - printf("%.8o\n", 042); - printf("%16.8e\n", 12345.67891); - printf("%16.8f\n", -12345.67891); - printf("%16.8g\n", 12345.67891); - printf("%8.4e\n", -12345.67891); - printf("%8.4f\n", 12345.67891); - printf("%8.4g\n", 12345.67891); - printf("%4.2f\n", 12345.67891); - printf("%.1f\n", 12345.67891); - printf("%.5s\n", "helloxyz"); -} - -int main(int argc, char **argv) { - std::string reference(R"here( 42 -00000042 - -00042 -00000042 -00000042 - 1.23456789e+04 - -12345.67891000 - 12345.679 --1.2346e+04 -12345.6789 -1.235e+04 -12345.68 -12345.7 -hello -)here"); - - CaptureStream capture(stdout); - - capture.Begin(); - hipLaunchKernelGGL(test_kernel, dim3(1), dim3(1), 0, 0); - hipStreamSynchronize(0); - capture.End(); - - std::string device_output = capture.getData(); - - HIPASSERT(device_output == reference); - passed(); -} diff --git a/tests/src/printf/printf_common.h b/tests/src/printf/printf_common.h deleted file mode 100644 index d8cce83f0d..0000000000 --- a/tests/src/printf/printf_common.h +++ /dev/null @@ -1,163 +0,0 @@ -#ifndef COMMON_H -#define COMMON_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(_WIN32) -#include -#else -#include -#include -#endif - -#if defined(_WIN32) -class CaptureStream { -private: - FILE* stream; - int fdPipe[2]; - int fd; - - static constexpr size_t bufferSize = 25 * 1024 * 1024; - -public: - CaptureStream(FILE *original) { - stream = original; - - if (pipe(fdPipe, bufferSize, O_TEXT) != 0) { - fprintf(stderr, "pipe(3) failed with error %d\n", errno); - assert(false); - } - - if ((fd = dup(fileno(stream))) == -1) { - fprintf(stderr, "dup(1) failed with error %d\n", errno); - assert(false); - } - } - - ~CaptureStream() { - close(fd); - close(fdPipe[1]); - close(fdPipe[0]); - } - - void Begin() { - fflush(stream); - - if (dup2(fdPipe[1], fileno(stream)) == -1) { - fprintf(stderr, "dup2(2) failed with error %d\n", errno); - assert(false); - } - - setvbuf(stream, NULL, _IONBF, 0); - } - - void End() { - if (dup2(fd, fileno(stream)) == -1) { - fprintf(stderr, "dup2(2) failed with error %d\n", errno); - assert(false); - } - } - - std::string getData() { - std::string data; - data.resize(bufferSize); - - int numRead = read(fdPipe[0], const_cast(data.c_str()), bufferSize); - data[numRead] = '\0'; - - data.resize(strlen(data.c_str())); - data.shrink_to_fit(); - - return data; - } -}; -#else -struct CaptureStream { - int saved_fd; - int orig_fd; - int temp_fd; - - char tempname[13] = "mytestXXXXXX"; - - CaptureStream(FILE *original) { - orig_fd = fileno(original); - saved_fd = dup(orig_fd); - - if ((temp_fd = mkstemp(tempname)) == -1) { - error(0, errno, "Error"); - assert(false); - } - } - - void Begin() { - fflush(nullptr); - if (dup2(temp_fd, orig_fd) == -1) { - error(0, errno, "Error"); - assert(false); - } - if (close(temp_fd) != 0) { - error(0, errno, "Error"); - assert(false); - } - } - - void End() { - fflush(nullptr); - if (dup2(saved_fd, orig_fd) == -1) { - error(0, errno, "Error"); - assert(false); - } - if (close(saved_fd) != 0) { - error(0, errno, "Error"); - assert(false); - } - } - - std::string getData() { - std::ifstream tmpFileStream(tempname); - std::stringstream strStream; - strStream << tmpFileStream.rdbuf(); - return strStream.str(); - } - - ~CaptureStream() { - if (remove(tempname) != 0) { - error(0, errno, "Error"); - assert(false); - } - } - - // Truncate the file up to size if we don't want too long log - void Truncate(size_t size) { - struct stat sb = { 0 }; - if (::stat(tempname, &sb) == -1) { - failed("failed lstat(%s) with error: %s \n", tempname, ::strerror(errno)); - return; - } - if (sb.st_size > size) { - if (::truncate(tempname, static_cast(size)) == -1) { - failed("failed truncate(%s) with error: %s \n", tempname, ::strerror(errno)); - return; - } - } - } -}; -#endif - -#define DECLARE_DATA() \ - const char *msg_short = "Carpe diem."; \ - const char *msg_long1 = "Lorem ipsum dolor sit amet, consectetur nullam. " \ - "In mollis imperdiet nibh nec ullamcorper."; \ - const char *msg_long2 = "Curabitur nec metus sit amet augue vehicula " \ - "ultrices ut id leo. Lorem ipsum dolor sit amet, " \ - "consectetur adipiscing elit amet."; - -#endif diff --git a/tests/src/runtimeApi/cooperativeGrps/api_failure_tests.cpp b/tests/src/runtimeApi/cooperativeGrps/api_failure_tests.cpp deleted file mode 100644 index 14dfb7464e..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/api_failure_tests.cpp +++ /dev/null @@ -1,286 +0,0 @@ -/* - Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ -// Test Description: -/*The general idea of the application is to test how Cooperative Groups kernel -launches work when launching too many warps to the target device. This test -first queries the nominal warp size of the target device. It then walks through -block sizes from 1 thread, 1 warp, 2 warps, ... `maximum_warps_in_a_block`. For -each of these, it queries the maximum number of blocks that can fit in each SM. -It then queries the number of SMs on the target device. This will yield a -calculation for the maximum number of blocks that can be co-scheduled on this -device. - -The Cooperative Groups API says that users should not launch more than this -many warps (or blocks, etc.) to the target device. This test first tires to -launch 2x as many blcoks, to confirm that the runtime prevents such a launch -by returning a proper error value (`hipErrorCooperativeLaunchTooLarge`). - -It then ensures that trying to launch too large of a kernel invocation does -not break the GPU by launching a kernel with exactly the maximum number of -blocks. - -Finally, we run the same test for a block size that is larger than the maximum -allowed by the device, to ensure that this case is properly detected by the -runtime and that nothing breaks.*/ - - - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include -#include -#include "test_common.h" - - -static inline void hipCheckAndFail(hipError_t errval, - const char *file, int line) { - hipError_t last_err = hipGetLastError(); - if (errval != hipSuccess) { - std::cerr << "hip error: " << hipGetErrorString(errval); - std::cerr << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - failed("\n"); - } - if (last_err != errval) { - std::cerr << "Error: the return value of a function was not the same "; - std::cerr << "as the value returned by hipGetLastError()" << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - std::cerr << " Function returned: " << hipGetErrorString(errval); - std::cerr << " (" << errval << ")" << std::endl; - std::cerr << "hipGetLastError() returned: " << hipGetErrorString(last_err); - std::cerr << " (" << last_err << ")" << std::endl; - failed("\n"); - } -} -#define hipCheckErr(errval) \ - do { hipCheckAndFail((errval), __FILE__, __LINE__); } while (0) - -static inline bool hipCheckExpected(hipError_t errval, - hipError_t expected_err, const char *file, int line) { - hipError_t last_err = hipGetLastError(); - if (errval != expected_err) { - std::cerr << "hip error: " << hipGetErrorString(errval); - std::cerr << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - return false; - } - if (last_err != errval) { - std::cerr << "Error: the return value of a function was not the same "; - std::cerr << "as the value returned by hipGetLastError()" << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - std::cerr << " Function returned: " << hipGetErrorString(errval); - std::cerr << " (" << errval << ")" << std::endl; - std::cerr << "hipGetLastError() returned: " << hipGetErrorString(last_err); - std::cerr << " (" << last_err << ")" << std::endl; - return false; - } - return true; -} - -static bool cooperative_groups_support(int device_id) { - hipError_t err; - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return false; - } - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return false; - } - return true; -} - -__global__ void test_kernel(long long *array) { - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - array[rank] += clock64(); -} - -__global__ void test_kernel_gfx11(long long *array) { -#ifdef __HIP_PLATFORM_AMD__ - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - array[rank] += wall_clock64(); -#endif -} - -int main(int argc, char** argv) { - hipError_t err; - int device_num, FailFlag = 0; - // Alocate the host input buffer, and two device-focused buffers that we - // will use for our test. - unsigned int *dev_array[2]; - HIPCHECK(hipGetDeviceCount(&device_num)); - for (int dev = 0; dev < device_num; ++dev) { - /*************************************************************************/ - /* Test whether target device supports cooperative groups ****************/ - HIPCHECK(hipSetDevice(dev)); - if (!cooperative_groups_support(dev)) { - std::cout << "Skipping the test with Pass result.\n"; - passed(); - } - - /*************************************************************************/ - /* Create the streams we will use in this test. **************************/ - hipStream_t streams[2]; - for (int i = 0; i < 2; i++) { - HIPCHECK(hipStreamCreate(&streams[i])); - } - - /*************************************************************************/ - /* We will try to launch more waves than the GPU can fit. ***************/ - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, dev)); - int warp_size = device_properties.warpSize; - int num_sms = device_properties.multiProcessorCount; - int max_num_threads = device_properties.maxThreadsPerBlock; - - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - // Check single-thread block, all numbers of warps, then too-large block - for (int block_size = 0; block_size <= (max_num_threads + warp_size); - block_size += warp_size) { - if (block_size == 0) { - block_size = 1; - } - int max_blocks_per_sm; - // Calculate the device occupancy to know how many blocks can be run. - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&max_blocks_per_sm, - test_kernel_used, block_size, 0, hipOccupancyDefault)); - - if ((block_size > max_num_threads) && (max_blocks_per_sm != 0)) { - std::cerr << "ERROR! Occupancy API indicated that we can have >0 "; - std::cerr << "blocks in a kernel when the block size is too large "; - std::cerr << "to work on the device." << std::endl; - std::cerr << "This is incorrect, and could possibly lead users "; - std::cerr << "to try to launch kernels that will fail." << std::endl; - //failed("\n"); - FailFlag = 1; - break; - } - - int desired_blocks = max_blocks_per_sm * num_sms; - bool expect_fail = false; - if (desired_blocks == 0) { - desired_blocks = 1; - expect_fail = true; - } - - /**********************************************************************/ - /* Set up data to pass into the kernel ********************************/ - - for (int i = 0; i < 2; i++) { - int test_size; - // Case where we expect to fail at launch. - if (i == 0) { - test_size = 2 * desired_blocks; - } else { - test_size = desired_blocks; - } - HIPCHECK(hipMalloc(reinterpret_cast(&dev_array[i]), - test_size * block_size * sizeof(long long))); - HIPCHECK(hipMemsetAsync(dev_array[i], 0, - test_size * block_size * sizeof(long long), - streams[i])); - } - - HIPCHECK(hipDeviceSynchronize()); - - /***********************************************************************/ - /* Launch the kernels **************************************************/ - void *coop_params[2][1]; - for (int i = 0; i < 2; i++) { - coop_params[i][0] = reinterpret_cast(&dev_array[i]); - } - - err = hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - 2 * desired_blocks, block_size, - coop_params[0], 0, streams[0]); - - hipError_t expect_to_see; - if (expect_fail) { - expect_to_see = hipErrorInvalidConfiguration; - } else { - expect_to_see = hipErrorCooperativeLaunchTooLarge; - } - if (!hipCheckExpected(err, expect_to_see, __FILE__, __LINE__)) { - std::cerr << "ERROR! Tried to launch a cooperative kernel with "; - std::cerr << "too many warps." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << hipGetErrorString(expect_to_see); - std::cerr << " (" << expect_to_see << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - break; - } - - HIPCHECK(hipDeviceSynchronize()); - err = hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), desired_blocks, - block_size, coop_params[1], 0, streams[1]); - - if (expect_fail) { - expect_to_see = hipErrorInvalidConfiguration; - } else { - expect_to_see = hipSuccess; - } - if (!hipCheckExpected(err, expect_to_see, __FILE__, __LINE__)) { - std::cerr << "ERROR! Tried to launch a cooperative kernel "; - std::cerr << "with a normal size, but a block size of "; - std::cerr << desired_blocks << std::endl; - std::cerr << "This SHOULD have returned "; - std::cerr << hipGetErrorString(expect_to_see); - std::cerr << " (" << expect_to_see << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - break; - } - - HIPCHECK(hipDeviceSynchronize()); - - if (block_size == 1) { - block_size = 0; - } - for (int m = 0; m < 2; ++m) { - HIPCHECK(hipFree(dev_array[m])); - } - } - for (int m = 0; m < 2; ++m) { - HIPCHECK(hipStreamDestroy(streams[m])); - } - if (FailFlag == 1) { - for (int m = 0; m < 2; ++m) { - HIPCHECK(hipFree(dev_array[m])); - } - failed("\n"); - } - } - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_down.cpp b/tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_down.cpp deleted file mode 100644 index aa7b00e290..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_down.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/* This test implements sum reduction kernel, first with each threads own rank - as input and comparing the sum with expected sum output derieved from n(n-1)/2 - formula. - This sample tests functionality of intrinsics provided by thread_block_tile type, - shfl_down and shfl_xor. -*/ - -#include "test_common.h" -#include -#include -#include - -using namespace cooperative_groups; - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) -#define WAVE_SIZE 32 - -__device__ int reduction_kernel_shfl_down(coalesced_group const& g, int val) { - int sz = g.size(); - - for (int i = sz / 2; i > 0; i >>= 1) { - val += g.shfl_down(val, i); - } - - // Choose the 0'th indexed thread that holds the reduction value to return - if (g.thread_rank() == 0) { - return val; - } - // Rest of the threads return no useful values - else { - return -1; - } -} - -__global__ void kernel_shfl_down (int * dPtr, int *dResults, int lane_delta, int cg_sizes) { - int id = threadIdx.x + blockIdx.x * blockDim.x; - - if (id % cg_sizes == 0) { - coalesced_group const& g = coalesced_threads(); - int rank = g.thread_rank(); - int val = dPtr[rank]; - dResults[rank] = g.shfl_down(val, lane_delta); - return; - } -} - -__global__ void kernel_cg_group_partition(int* result, unsigned int tileSz, int cg_sizes) { - - int id = threadIdx.x + blockIdx.x * blockDim.x; - if (id % cg_sizes == 0) { - coalesced_group threadBlockCGTy = coalesced_threads(); - int input, outputSum, expectedSum; - - // Choose a leader thread to print the results - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - coalesced_group tiledPartition = tiled_partition(threadBlockCGTy, tileSz); - int threadRank = tiledPartition.thread_rank(); - - input = tiledPartition.thread_rank(); - - // (n-1)(n)/2 - expectedSum = ((tileSz - 1) * tileSz / 2); - - outputSum = reduction_kernel_shfl_down(tiledPartition, input); - - if (tiledPartition.thread_rank() == 0) { - printf( - " Sum of all ranks 0..%d in this tiledPartition group using shfl_down is %d (expected " - "%d)\n", - tiledPartition.size() - 1, outputSum, expectedSum); - result[threadBlockCGTy.thread_rank() / (tileSz)] = outputSum; - } - return; - } -} - -void verifyResults(int* ptr, int expectedResult, int numTiles) { - for (int i = 0; i < numTiles; i++) { - if (ptr[i] != expectedResult) { - printf(" Results do not match! "); - } - } -} - -void compareResults(int* cpu, int* gpu, int size) { - for (unsigned int i = 0; i < size / sizeof(int); i++) { - if (cpu[i] != gpu[i]) { - printf(" results do not match."); - } - } -} - -void printResults(int* ptr, int size) { - for (int i = 0; i < size; i++) { - std::cout << ptr[i] << " "; - } - std::cout << '\n'; -} - -static void test_group_partition(unsigned int tileSz) { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = 32; - - std::vector cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - int numTiles = ((blockSize * threadsPerBlock) / i) / tileSz; - int expectedSum = ((tileSz - 1) * tileSz / 2); - int* expectedResult = new int[numTiles]; - - // numTiles = 0 when partitioning is possible. The below statement is to avoid - // out-of-bounds error and still evaluate failure case. - numTiles = (numTiles == 0) ? 1 : numTiles; - - for (int i = 0; i < numTiles; i++) { - expectedResult[i] = expectedSum; - } - - int* dResult = NULL; - int* hResult = NULL; - - hipHostMalloc(&hResult, numTiles * sizeof(int), hipHostMallocDefault); - memset(hResult, 0, numTiles * sizeof(int)); - - hipMalloc(&dResult, numTiles * sizeof(int)); - - - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dResult, tileSz, i); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - - - hipMemcpy(hResult, dResult, sizeof(int) * numTiles, hipMemcpyDeviceToHost); - - verifyResults(hResult, expectedSum, numTiles); - - // Free all allocated memory on host and device - hipFree(dResult); - hipFree(hResult); - delete[] expectedResult; - - printf("\n...PASSED.\n\n"); - } -} - -static void test_shfl_down() { - - std::vector cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - hipError_t err; - int blockSize = 1; - int threadsPerBlock = WAVE_SIZE; - - int totalThreads = blockSize * threadsPerBlock; - int group_size = totalThreads / i; - int group_size_in_bytes = group_size * sizeof(int); - - int* hPtr = NULL; - int* dPtr = NULL; - int* dResults = NULL; - int lane_delta = rand() % group_size; - std::cout << "Testing coalesced_groups shfl_down with lane_delta " << lane_delta << "and group size " - << WAVE_SIZE << '\n' << std::endl; - - int arrSize = blockSize * threadsPerBlock * sizeof(int); - - hipHostMalloc(&hPtr, arrSize); - // Fill up the array - for (int i = 0; i < WAVE_SIZE; i++) { - hPtr[i] = rand() % 1000; - } - - int* cpuResultsArr = (int*)malloc(group_size_in_bytes); - for (int i = 0; i < group_size; i++) { - cpuResultsArr[i] = (i + lane_delta >= group_size) ? hPtr[i] : hPtr[i + lane_delta]; - } - //printf("Array passed to GPU for computation\n"); - //printResults(hPtr, WAVE_SIZE); - hipMalloc(&dPtr, group_size_in_bytes); - hipMalloc(&dResults, group_size_in_bytes); - - hipMemcpy(dPtr, hPtr, group_size_in_bytes, hipMemcpyHostToDevice); - // Launch Kernel - hipLaunchKernelGGL(kernel_shfl_down, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i); - hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - //printf("GPU results: \n"); - //printResults(hPtr, WAVE_SIZE); - //printf("Printing cpu to be verified array\n"); - //printResults(cpuResultsArr, WAVE_SIZE); - - compareResults(hPtr, cpuResultsArr, group_size_in_bytes); - std::cout << "Results verified!\n"; - - hipFree(hPtr); - hipFree(dPtr); - free(cpuResultsArr); - } -} - - - -int main() { - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - - // Test shfl_down with random group sizes - for (int i = 0; i < 100; i++) { - test_shfl_down(); - } - - std::cout << "Testing static tiled_partition for different tile sizes using shfl_down" - << std::endl; - - int testNo = 1; - std::vector tileSizes = {2, 4, 8, 16, 32}; - for (auto i : tileSizes) { - std::cout << "TEST " << testNo << ":" << '\n' << std::endl; - test_group_partition(i); - testNo++; - } - - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_up.cpp b/tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_up.cpp deleted file mode 100644 index 62a05ec244..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/coalesced_groups_shfl_up.cpp +++ /dev/null @@ -1,260 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/* This test implements prefix sum(scan) kernel, first with each threads own rank - as input and comparing the sum with expected serial summation output on CPU. - - This sample tests functionality of intrinsics provided by coalesced_group, - shfl_up. -*/ -#include "test_common.h" -#include -#include -#include - -using namespace cooperative_groups; - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) -#define WAVE_SIZE 32 -__device__ int prefix_sum_kernel(coalesced_group const& g, int val) { - int sz = g.size(); - for (int i = 1; i < sz; i <<= 1) { - int temp = g.shfl_up(val, i); - - if (g.thread_rank() >= i) { - val += temp; - } - } - return val; -} - -__global__ void kernel_shfl_up (int * dPtr, int *dResults, int lane_delta, int cg_sizes) { - int id = threadIdx.x + blockIdx.x * blockDim.x; - - if (id % cg_sizes == 0) { - coalesced_group g = coalesced_threads(); - int rank = g.thread_rank(); - int val = dPtr[rank]; - dResults[rank] = g.shfl_up(val, lane_delta); - return; - } - -} - -__global__ void kernel_cg_group_partition(int* dPtr, unsigned int tileSz, int cg_sizes) { - - int id = threadIdx.x + blockIdx.x * blockDim.x; - if (id % cg_sizes == 0) { - coalesced_group threadBlockCGTy = coalesced_threads(); - int input, outputSum; - - // we pass its own thread rank as inputs - input = threadBlockCGTy.thread_rank(); - - // Choose a leader thread to print the results - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - coalesced_group tiledPartition = tiled_partition(threadBlockCGTy, tileSz); - - input = tiledPartition.thread_rank(); - - outputSum = prefix_sum_kernel(tiledPartition, input); - - // Update the result array with the corresponsing prefix sum - dPtr[threadBlockCGTy.thread_rank()] = outputSum; - return; - } -} - -void serialScan(int* ptr, int size) { - // Fill up the array - for (int i = 0; i < size; i++) { - ptr[i] = i; - } - - int acc = 0; - for (int i = 0; i < size; i++) { - acc = acc + ptr[i]; - ptr[i] = acc; - } -} - -void printResults(int* ptr, int size) { - for (int i = 0; i < size; i++) { - std::cout << ptr[i] << " "; - } - std::cout << '\n'; -} - -void verifyResults(int* cpu, int* gpu, int size) { - for (unsigned int i = 0; i < size / sizeof(int); i++) { - if (cpu[i] != gpu[i]) { - printf(" Results do not match."); - } - } -} - -static void test_group_partition(unsigned tileSz) { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = WAVE_SIZE; - - int* hPtr = NULL; - int* dPtr = NULL; - int* cpuPrefixSum = NULL; - - std::vector cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - int arrSize = blockSize * threadsPerBlock * sizeof(int); - - hipHostMalloc(&hPtr, arrSize); - hipMalloc(&dPtr, arrSize); - - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dPtr, tileSz, i); - hipMemcpy(hPtr, dPtr, arrSize, hipMemcpyDeviceToHost); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - - cpuPrefixSum = new int[tileSz]; - serialScan(cpuPrefixSum, tileSz); - //std::cout << "\nPrefix sum results on CPU\n"; - //printResults(cpuPrefixSum, tileSz); - - //std::cout << "\nPrefix sum results on GPU\n"; - //printResults(hPtr, tileSz); - std::cout << "\n"; - verifyResults(hPtr, cpuPrefixSum, tileSz); - std::cout << "Results verified!\n"; - - delete[] cpuPrefixSum; - hipFree(hPtr); - hipFree(dPtr); - } -} - -static void test_shfl_up() { - - std::vector cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - hipError_t err; - int blockSize = 1; - - int threadsPerBlock = WAVE_SIZE; - int totalThreads = blockSize * threadsPerBlock; - int group_size = totalThreads / i; - int group_size_in_bytes = group_size * sizeof(int); - - int* hPtr = NULL; - int* dPtr = NULL; - int* dResults = NULL; - int lane_delta = (rand() % group_size); - - std::cout << "Testing coalesced_groups shfl_up with lane_delta " << lane_delta - << " and group size " << WAVE_SIZE << '\n' << std::endl; - - int arrSize = blockSize * threadsPerBlock * sizeof(int); - - hipHostMalloc(&hPtr, arrSize); - // Fill up the array - for (int i = 0; i < WAVE_SIZE; i++) { - hPtr[i] = rand() % 1000; - } - //printResults(hPtr, WAVE_SIZE); - - int* cpuResultsArr = (int*)malloc(group_size_in_bytes); - for (int i = 0; i < group_size; i++) { - cpuResultsArr[i] = (i <= (lane_delta - 1)) ? hPtr[i] : hPtr[i - lane_delta]; - } - - //printf("Printing cpu results arr\n"); - //printResults(cpuResultsArr, WAVE_SIZE); - - hipMalloc(&dPtr, group_size_in_bytes); - hipMalloc(&dResults, group_size_in_bytes); - - hipMemcpy(dPtr, hPtr, group_size_in_bytes, hipMemcpyHostToDevice); - // Launch Kernel - hipLaunchKernelGGL(kernel_shfl_up, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i); - hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - //printf("GPU computation array :\n"); - //printResults(hPtr, WAVE_SIZE); - - verifyResults(hPtr, cpuResultsArr, group_size_in_bytes); - std::cout << "Results verified!\n"; - - hipFree(hPtr); - hipFree(dPtr); - free(cpuResultsArr); - } -} - -int main() { - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - - for (int i = 0; i < 100; i++) { - test_shfl_up(); - } - - std::cout << "Testing coalesced_groups partitioning and shfl_up" << '\n' << std::endl; - - int testNo = 1; - std::vector tileSizes = {2, 4, 8, 16, 32}; - for (auto i : tileSizes) { - std::cout << "TEST " << testNo << ":" << '\n' << std::endl; - test_group_partition(i); - testNo++; - } - passed(); -} - -/* Kogge-Stone algorithm */ \ No newline at end of file diff --git a/tests/src/runtimeApi/cooperativeGrps/cooperative_streams.cpp b/tests/src/runtimeApi/cooperativeGrps/cooperative_streams.cpp deleted file mode 100644 index aa32bb4a3e..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/cooperative_streams.cpp +++ /dev/null @@ -1,465 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/* -The general idea of the application is to test how Cooperative Groups kernel -launches to a stream interact with other kernels being launched to different -streams. - -For example: the HIP runtime will force cooperative kernel launches to run -serially, even if they are launched to different streams. However, -cooperative kernel launches can run in parallel with regular kernels that -are launched to other streams. This limitation is so that the cooperative -kernels do not conflict with one another for resources and potentially -deadlock the system. - -As such, this benchmark tests three situations: - - 1. Launching a cooperative kernel by itself to stream[0] - 2. Launching two cooperative kernels in parallel to stream[0] and stream[1] - 3. Launching two cooperative kernels in parallel to stream[0] and stream[1] - and launching a third non-cooperative kernel to stream[2] - -We time how long it takes to run each of these benchmarks and print it as -the output of the benchmark. The kernels themselves are just useless time- -wasting code so that the kernel takes a meaningful amount of time on the -GPU before it exits. We only launch a single wavefront for each kernel, so -any serialization should not be because of GPU occupancy concerns. - -If test #2 takes roughly twice as long as #1, that implies that cooperative -kernels are properly serialized with each other by the runtime. - -If test #3 takes the same amount of time as test #2, that implies that -regular kernels can properly run in parallel with cooperative kernels. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST_NAMED: %t cooperative_streams_least_capacity --tests 0x0 - * TEST_NAMED: %t cooperative_streams_half_capacity --tests 0x1 - * TEST_NAMED: %t cooperative_streams_full_capacity --tests 0x2 - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -static inline void hipCheckAndFail(hipError_t errval, - const char *file, int line) { - hipError_t last_err = hipGetLastError(); - if (errval != hipSuccess) { - std::cerr << "hip error: " << hipGetErrorString(errval); - std::cerr << std::endl; - std::cerr << "Location: " << file << ":" << line << std::endl; - failed(" "); - } - if (last_err != errval) { - std::cerr << "Error: the return value of a function was not the same "; - std::cerr << "as the value returned by hipGetLastError()" << std::endl; - std::cerr << "Location: " << file << ":" << line << std::endl; - std::cerr << "Function returned: " << hipGetErrorString(errval); - std::cerr << " (" << errval << ")" << std::endl; - std::cerr << "hipGetLastError() returned: " << hipGetErrorString(last_err); - std::cerr << " (" << last_err << ")" << std::endl; - failed(" "); - } -} -#define hipCheckErr(errval) \ - do { hipCheckAndFail((errval), __FILE__, __LINE__); } while (0) - -__global__ void test_kernel(uint32_t loops, unsigned long long *array, long long totalTicks) { - cooperative_groups::thread_block tb = cooperative_groups::this_thread_block(); - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - - for (int i = 0; i < loops; i++) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < totalTicks); - tb.sync(); - array[rank] += clock64(); - } -} - -__global__ void test_kernel_gfx11(uint32_t loops, unsigned long long *array, long long totalTicks) { -#ifdef __HIP_PLATFORM_AMD__ - cooperative_groups::thread_block tb = cooperative_groups::this_thread_block(); - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - - for (int i = 0; i < loops; i++) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < totalTicks); - tb.sync(); - array[rank] += wall_clock64(); - } -#endif -} - -template -bool verifyLeastCapacity(T& single_kernel_time, T& double_kernel_time, T& triple_kernel_time) -{ -#ifdef __HIP_PLATFORM_AMD__ - // hipLaunchCooperativeKernel() follows serialization policy on AMD devices - // Test that the two cooperative kernels took roughly twice as long as the one - if (double_kernel_time < 1.8 * single_kernel_time || - double_kernel_time > 2.2 * single_kernel_time ) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Two cooperative kernels launched at the same "; - std::cerr << "time did not take roughly twice as long as a single "; - std::cerr << "cooperative kernel." << std::endl; - std::cerr << "Were they truly serialized?" << std::endl; - return false; - } -#else - // hipLaunchCooperativeKernel() doesn't follow serialization policy on NV devices - // Test that the two cooperative kernels took roughly as long as the one - if (double_kernel_time < 0.8 * single_kernel_time || - double_kernel_time > 1.2 * single_kernel_time ) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Two cooperative kernels launched at the same "; - std::cerr << "time did not take roughly as long as a single "; - std::cerr << "cooperative kernel." << std::endl; - return false; - } -#endif - - // Test that the three kernels together took roughly as long as the two - // cooperative kernels. - if (triple_kernel_time > 1.1 * double_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Launching a normal kernel in parallel with two "; - std::cerr << "back-to-back cooperative kernels still ended up taking "; - std::cerr << "more than 10% longer than the two cooperative kernels "; - std::cerr << "alone." << std::endl; - return false; - } - return true; -} - -template -bool verifyHalfCapacity(T& single_kernel_time, T& double_kernel_time, T& triple_kernel_time) -{ - // Test that the two cooperative kernels took roughly twice as long as the one - if (double_kernel_time < 1.8 * single_kernel_time || - double_kernel_time > 2.2 * single_kernel_time ) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Two cooperative kernels launched at the same "; - std::cerr << "time did not take roughly twice as long as a single "; - std::cerr << "cooperative kernel." << std::endl; - return false; - } - - // Test that the three kernels together took roughly as long as the two - // cooperative kernels. - if (triple_kernel_time > 1.1 * double_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Launching a normal kernel in parallel with two "; - std::cerr << "back-to-back cooperative kernels still ended up taking "; - std::cerr << "more than 10% longer than the two cooperative kernels "; - std::cerr << "alone." << std::endl; - return false; - } - return true; -} - -template -bool verifyFullCapacity(T& single_kernel_time, T& double_kernel_time, T& triple_kernel_time) -{ - // Test that the two cooperative kernels took roughly twice as long as the one - if (double_kernel_time < 1.7 * single_kernel_time || - double_kernel_time > 2.3 * single_kernel_time ) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Two cooperative kernels launched at the same "; - std::cerr << "time did not take roughly twice as long as a single "; - std::cerr << "cooperative kernel." << std::endl; - return false; - } - - // Test that the three kernels together took roughly 1.9 times as long as the two - // cooperative kernels. If the first 2 kernels run very fast, the third - // won't share much time with the second kernel. - if (triple_kernel_time > 1.9 * double_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Launching a normal kernel in parallel with two "; - std::cerr << "back-to-back cooperative kernels still ended up taking "; - std::cerr << "more than 90% longer than the two cooperative kernels "; - std::cerr << "alone." << std::endl; - return false; - } - return true; -} - -template -bool verify(int tests, T &single_kernel_time, T &double_kernel_time, - T &triple_kernel_time) { - switch (tests) { - case 0: - return verifyLeastCapacity(single_kernel_time, double_kernel_time, - triple_kernel_time); - case 1: - return verifyHalfCapacity(single_kernel_time, double_kernel_time, - triple_kernel_time); - case 2: - return verifyFullCapacity(single_kernel_time, double_kernel_time, - triple_kernel_time); - default: - return false; - } -} - -int main(int argc, char** argv) { - p_tests = 1; // Default for half capacity - HipTest::parseStandardArguments(argc, argv, true); - - if (p_tests < 0 || p_tests > 2) { - printf("--tests 0: test least capacity\n"); - printf(" 1: test half capacity\n"); - printf(" 2: test full capacity\n"); - failed("Wrong p_tests %d\n", p_tests); - } - hipError_t err = hipSuccess; - /*************************************************************************/ - int device_num = 0, loops = 1000; - bool FailFlag = false; - /* Create the streams we will use in this test. **************************/ - hipStream_t streams[3]; - // Alocate the host input buffer, and two device-focused buffers that we - // will use for our test. - unsigned long long *dev_array[3]; - HIPCHECK(hipGetDeviceCount(&device_num)); - for (int dev = 0; dev < device_num; ++dev) { - /*************************************************************************/ - HIPCHECK(hipSetDevice(dev)); - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, dev)); - - /* Test whether target device supports cooperative groups ****************/ - if (device_properties.cooperativeLaunch == 0) { - std::cout << "Cooperative group support not available in device " << dev << std::endl; - continue; - } - - /* We will launch enough waves to fill up all of the GPU *****************/ - int warp_size = device_properties.warpSize; - int num_sms = device_properties.multiProcessorCount; - long long totalTicks = device_properties.clockRate ; - int max_blocks_per_sm = 0; - // Calculate the device occupancy to know how many blocks can be run. - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks_per_sm, test_kernel_used, - warp_size, 0)); - int max_active_blocks = max_blocks_per_sm * num_sms; - int coop_blocks = 0; - int reg_blocks = 0; - - switch (p_tests) { - case 0: - // 1 block - coop_blocks = 1; - reg_blocks = 1; - break; - case 1: - // Half capacity - // To make sure the second kernel launched by hipLaunchCooperativeKernel - // is invoked after the first kernel finished - coop_blocks = max_active_blocks / 2 + 1; - // To make sure the third kernel launched by hipLaunchKernelGGL is invoked - // concurrently with the second kernel - reg_blocks = max_active_blocks - coop_blocks; - break; - case 2: - // Full capacity - coop_blocks = max_active_blocks; - reg_blocks = max_active_blocks; - break; - default: - failed("wrong p_tests %d", p_tests); - } - std::cout << "p_tests: " << p_tests << std::endl; - std::cout << "Device: " << dev << std::endl; - std::cout << "Device name: " << device_properties.name << std::endl; - std::cout << "clockRate: " << device_properties.clockRate << " khz" <(&dev_array[i]), - max_active_blocks * warp_size * sizeof(long long))); - HIPCHECK(hipMemsetAsync(dev_array[i], 0, max_active_blocks * warp_size * sizeof(long long), - streams[i])); - } - - HIPCHECK(hipDeviceSynchronize()); - - /*************************************************************************/ - /* Launch the kernels ****************************************************/ - void *coop_params[3][3]; - for (int i = 0; i < 3; i++) { - coop_params[i][0] = reinterpret_cast(&loops); - coop_params[i][1] = reinterpret_cast(&dev_array[i]); - coop_params[i][2] = reinterpret_cast(&totalTicks); - } - - hipEvent_t single_start0, single_end0; - hipEvent_t single_start, single_end; - hipEvent_t double_start, double_end; - hipEvent_t triple_start, triple_end; - - HIPCHECK(hipEventCreate(&single_start0)); - HIPCHECK(hipEventCreate(&single_end0)); - HIPCHECK(hipEventCreate(&single_start)); - HIPCHECK(hipEventCreate(&single_end)); - HIPCHECK(hipEventCreate(&double_start)); - HIPCHECK(hipEventCreate(&double_end)); - HIPCHECK(hipEventCreate(&triple_start)); - HIPCHECK(hipEventCreate(&triple_end)); - - // Verify over capacity - HIPCHECK_API(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - max_active_blocks + 1, warp_size, - coop_params[0], 0, streams[0]), - hipErrorCooperativeLaunchTooLarge); - - std::cout << "Launching an initial single cooperative kernel..." << std::endl; - // We need exclude the the initial launching as it will need time to load code obj. - HIPCHECK(hipEventRecord(single_start0, 0)); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - max_active_blocks, warp_size, - coop_params[0], 0, streams[0])); - HIPCHECK(hipEventRecord(single_end0, 0)); - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a single cooperative kernel..." << std::endl; - HIPCHECK(hipEventRecord(single_start, 0)); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - coop_blocks, warp_size, - coop_params[0], 0, streams[0])); - HIPCHECK(hipEventRecord(single_end, 0)); - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching 2 cooperative kernels to different streams..."; - std::cout << std::endl; - HIPCHECK(hipEventRecord(double_start, 0)); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - coop_blocks, warp_size, - coop_params[0], 0, streams[0])); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - coop_blocks, warp_size, - coop_params[1], 0, streams[1])); - HIPCHECK(hipEventRecord(double_end, 0)); - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching 2 cooperative kernels and 1 normal kernel..."; - std::cout << std::endl; - HIPCHECK(hipEventRecord(triple_start, 0)); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - coop_blocks, warp_size, - coop_params[0], 0, streams[0])); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - coop_blocks, warp_size, - coop_params[1], 0, streams[1])); - hipLaunchKernelGGL(test_kernel_used, dim3(reg_blocks), dim3(warp_size), - 0, streams[2], loops, dev_array[2], totalTicks); - HIPCHECK(hipEventRecord(triple_end, 0)); - HIPCHECK(hipDeviceSynchronize()); - - float single_kernel_time0; - float single_kernel_time; - float double_kernel_time; - float triple_kernel_time; - - HIPCHECK(hipEventElapsedTime(&single_kernel_time0, single_start0, single_end0)); - HIPCHECK(hipEventElapsedTime(&single_kernel_time, single_start, single_end)); - HIPCHECK(hipEventElapsedTime(&double_kernel_time, double_start, double_end)); - HIPCHECK(hipEventElapsedTime(&triple_kernel_time, triple_start, triple_end)); - - std::cout << "Initial single kernel took:" << std::endl; - std::cout << " " << (int)single_kernel_time0; - std::cout << " milli-seconds" << std::endl; - std::cout << std::endl; - std::cout << "A single kernel took:" << std::endl; - std::cout << " " << (int)single_kernel_time; - std::cout << " milli-seconds" << std::endl; - std::cout << std::endl; - std::cout << "Two cooperative kernels that could run together took:"; - std::cout << std::endl; - std::cout << " " << (int)double_kernel_time; - std::cout << " milli-seconds" << std::endl; - std::cout << std::endl; - std::cout << "Two coop kernels and a third regular kernel took:"; - std::cout << std::endl << " "; - std::cout << (int)triple_kernel_time; - std::cout << " milli-seconds" << std::endl; - - std::cout << "Testing whether these times make sense.." << std::endl; - - FailFlag = !verify(p_tests, single_kernel_time, - double_kernel_time, triple_kernel_time); - - - HIPCHECK(hipEventDestroy(single_start0)); - HIPCHECK(hipEventDestroy(single_end0)); - HIPCHECK(hipEventDestroy(single_start)); - HIPCHECK(hipEventDestroy(single_end)); - HIPCHECK(hipEventDestroy(double_start)); - HIPCHECK(hipEventDestroy(double_end)); - HIPCHECK(hipEventDestroy(triple_start)); - HIPCHECK(hipEventDestroy(triple_end)); - - for (int k = 0; k < 3; ++k) { - HIPCHECK(hipFree(dev_array[k])); - HIPCHECK(hipStreamDestroy(streams[k])); - } - if (FailFlag) { - failed("Failed to exit "); - } - } - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/grid_group_data_sharing.cpp b/tests/src/runtimeApi/cooperativeGrps/grid_group_data_sharing.cpp deleted file mode 100644 index 2974852fde..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/grid_group_data_sharing.cpp +++ /dev/null @@ -1,303 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/*The general idea of the application is to create a buffer of width N. N is a -command line parameter, and the user will need to make sure that we can fit -two buffers of N unsigned integers onto the target GPU at the same time. - -We then launch a fixed number of warps to the GPU. This number is calculated -to fill the GPU with as many warps as can simultaneously run on the GPU. -The threads in these warps then walk over two arrays. First, values from -A[offset] are added into B[offset]. After all of A is added into all of B -in this element-wise manner, all of the waves barrier with one another. - -After the barrier, the waves start adding values from B[mirror_offset] into -A[offset]. Mirror offset means that the wave that is writing into A[7] is -reading from B[7 before the last value]. This was probably written by a -different thread before the barrier. - -After going through this loop a certain number of times, the kernel ends and -we read the arrays back out and recalculate this algorithm serially on the -CPU. We compare the serial version to the version that has inter-thread data -sharing and barriers and ensure they result in the same answer. - -If they do have the same answer, then we can pretty confidently say that -writing from thread X and then hitting a barrier allows thread Y to see the -values.*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ -#include -#include -#include "test_common.h" - -static inline void hipCheckAndFail(hipError_t errval, - const char *file, int line) { - hipError_t last_err = hipGetLastError(); - if (errval != hipSuccess) { - std::cerr << "hip error: " << hipGetErrorString(errval); - std::cerr << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - exit(errval); - } - if (last_err != errval) { - std::cerr << "Error: the return value of a function was not the same "; - std::cerr << "as the value returned by hipGetLastError()" << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - std::cerr << " Function returned: " << hipGetErrorString(errval); - std::cerr << " (" << errval << ")" << std::endl; - std::cerr << "hipGetLastError() returned: " << hipGetErrorString(last_err); - std::cerr << " (" << last_err << ")" << std::endl; - failed("\n"); - } -} -#define hipCheckErr(errval)\ - do { hipCheckAndFail((errval), __FILE__, __LINE__); } while (0) - -static int cooperative_groups_support(int device_id) { - hipError_t err; - - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - return 1; -} - -static int verify_coop_arrays(unsigned int loops, unsigned int *host_input, - unsigned int *first_array, - unsigned int *second_array, - unsigned int array_len) { - unsigned int *host_first_array = host_input; - unsigned int *host_second_array = (unsigned int*)calloc(array_len, - sizeof(int)); - - for (int i = 0; i < loops; i++) { - for (int offset = 0; offset < array_len; offset++) { - host_second_array[offset] += host_first_array[offset]; - } - - for (int offset = 0; offset < array_len; offset++) { - unsigned int swizzle_offset = array_len - offset - 1; - host_first_array[offset] += host_second_array[swizzle_offset]; - } - } - - for (int i = 0; i < array_len; i++) { - if (host_first_array[i] != first_array[i]) { - std::cerr << "Test failure!" << std::endl; - std::cerr << " host_first_array[" << i << "] contains the "; - std::cerr << "value " << host_first_array[i] << std::endl; - std::cerr << " GPU first_array[" << i << "] contains the "; - std::cerr << "value " << first_array[i] << std::endl; - return -1; - } - if (host_second_array[i] != second_array[i]) { - std::cerr << "Test failure!" << std::endl; - std::cerr << " host_second_array[" << i << "] contains the "; - std::cerr << "value " << host_second_array[i] << std::endl; - std::cerr << " GPU second_array[" << i << "] contains the "; - std::cerr << "value " << second_array[i] << std::endl; - return -1; - } - } - - std::cout << "Coop test appears to work properly!" << std::endl; - free(host_second_array); - return 0; -} - -__global__ void -coop_kernel(unsigned int *first_array, unsigned int *second_array, - unsigned int loops, unsigned int array_len) { - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - unsigned int rank = grid.thread_rank(); - unsigned int grid_size = grid.size(); - - for (int i = 0; i < loops; i++) { - // The goal of this loop is to directly add in values from - // array one into array two, on a per-wave basis. - for (int offset = rank; offset < array_len; offset += grid_size) { - second_array[offset] += first_array[offset]; - } - - grid.sync(); - - // The goal of this loop is to pull data the "mirror" lane in - // array two and add it back into array one. This causes inter- - // thread swizzling. - for (int offset = rank; offset < array_len; offset += grid_size) { - unsigned int swizzle_offset = array_len - offset - 1; - first_array[offset] += second_array[swizzle_offset]; - } - - grid.sync(); - } -} - -int main(int argc, char** argv) { - hipError_t err; - /*************************************************************************/ - /* Parse the command line parameters *************************************/ - // Arguments to pull out of the command line. - int device_num = 0, loops = 2, width = 4096, flag = 0; - HIPCHECK(hipGetDeviceCount(&device_num)); - for (int dev = 0; dev < device_num; ++dev) { - std::cout << "Device number: " << dev << std::endl; - std::cout << "Loops: " << loops << std::endl; - std::cout << "Width: " << width << std::endl; - - /*************************************************************************/ - /* Test whether target device supports cooperative groups ****************/ - HIPCHECK(hipSetDevice(dev)); - - if (!cooperative_groups_support(dev)) { - std::cout << "Skipping the test with Pass result.\n"; - passed(); - } - - /*************************************************************************/ - /* We will launch enough waves to fill up all of the GPU *****************/ - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, dev)); - - int warp_size = device_properties.warpSize; - int num_sms = device_properties.multiProcessorCount; - - std::cout << "Device name: " << device_properties.name << std::endl; - std::cout << std::endl; - - // Calculate the device occupancy to know how many blocks can be run. - int max_blocks_per_sm; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks_per_sm, - coop_kernel, - warp_size, 0)); - - int total_blocks = max_blocks_per_sm * num_sms; - - /*************************************************************************/ - /* Create the streams we will use in this test. **************************/ - hipStream_t streams[2]; - for (int i = 0; i < 2; i++) { - HIPCHECK(hipStreamCreate(&streams[i])); - } - - /*************************************************************************/ - /* Set up data to pass into the kernel ***********************************/ - - // Alocate the host input buffer, and two device-focused buffers that we - // will use for our test. - unsigned int *input_buffer = (unsigned int*)calloc(width, - sizeof(unsigned int)); - for (int i = 0; i < width; i++) { - input_buffer[i] = i; - } - - unsigned int *first_dev_array; - HIPCHECK(hipMalloc(reinterpret_cast(&first_dev_array), - width * sizeof(unsigned int))); - - HIPCHECK(hipMemcpyAsync(first_dev_array, input_buffer, - width * sizeof(unsigned int), - hipMemcpyHostToDevice, streams[0])); - - unsigned int *second_dev_array; - HIPCHECK(hipMalloc(reinterpret_cast(&second_dev_array), - width * sizeof(unsigned int))); - HIPCHECK(hipMemsetAsync(second_dev_array, 0, width * sizeof(unsigned int), - streams[0])); - - /*************************************************************************/ - /* Launch the kernels ****************************************************/ - std::cout << "Launching a cooperative kernel with " << total_blocks; - std::cout << " thread blocks, each with " << warp_size << " threads"; - std::cout << std::endl; - - void *coop_params[4]; - coop_params[0] = reinterpret_cast(&first_dev_array); - coop_params[1] = reinterpret_cast(&second_dev_array); - coop_params[2] = reinterpret_cast(&loops); - coop_params[3] = reinterpret_cast(&width); - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(coop_kernel), - total_blocks, warp_size, coop_params, - 0, streams[0])); - - /*************************************************************************/ - /* Read back the buffers and print out their data ************************/ - unsigned int *first_array = (unsigned int*)calloc(width, - sizeof(unsigned int)); - unsigned int *second_array = (unsigned int*)calloc(width, - sizeof(unsigned int)); - HIPCHECK(hipMemcpyAsync(first_array, first_dev_array, - width * sizeof(unsigned int), - hipMemcpyDeviceToHost, streams[0])); - - HIPCHECK(hipMemcpyAsync(second_array, second_dev_array, - width * sizeof(unsigned int), - hipMemcpyDeviceToHost, streams[0])); - - std::cout << "Waiting for cooperative work to finish..." << std::endl; - std::cout << std::flush; - - HIPCHECK(hipStreamSynchronize(streams[0])); - - - int ret_val = 0; - - std::cout << "Attemping to verify buffers." << std::endl; - std::cout << std::flush; - ret_val = verify_coop_arrays(loops, input_buffer, first_array, - second_array, width); - if (!ret_val) { - std::cout << "It appears that inter-thread data sharing at "; - std::cout << "grid_group sync points works properly!" << std::endl; - } else { - flag = 1; - } - for (int k = 0; k < 2; ++k) { - HIPCHECK(hipStreamDestroy(streams[k])); - } - HIPCHECK(hipFree(first_dev_array)); - HIPCHECK(hipFree(second_dev_array)); - free(input_buffer); - free(first_array); - free(second_array); - } - if (!flag) { - passed(); - } else { - failed("\n"); - } -} diff --git a/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupType.cpp b/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupType.cpp deleted file mode 100644 index 2d8d9f2314..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupType.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "hip/hip_cooperative_groups.h" -#include -#include - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -using namespace cooperative_groups; - -static __device__ int gm[2]; - -static __global__ -void kernel_cg_grid_group_type(int *sizeTestD, - int *thdRankTestD, - int *isValidTestD, - int *syncTestD) -{ - grid_group gg = this_grid(); - int gIdx = (blockIdx.x * blockDim.x) + threadIdx.x; - - // Test size - sizeTestD[gIdx] = gg.size(); - - // Test thread_rank - thdRankTestD[gIdx] = gg.thread_rank(); - - // Test is_valid - isValidTestD[gIdx] = gg.is_valid(); - - // Test sync - if (blockIdx.x == 0 && threadIdx.x == 0) - gm[0] = 10; - else if (blockIdx.x == 1 && threadIdx.x == 0) - gm[1] = 20; - gg.sync(); - syncTestD[gIdx] = gm[1] * gm[0]; -} - -static void test_cg_grid_group_type(int blockSize) -{ - int nBytes = sizeof(int) * 2 * blockSize; - int *sizeTestD, *sizeTestH; - int *thdRankTestD, *thdRankTestH; - int *isValidTestD, *isValidTestH; - int *syncTestD, *syncTestH; - - // Allocate device memory - ASSERT_EQUAL(hipMalloc(&sizeTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&thdRankTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&isValidTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&syncTestD, nBytes), hipSuccess); - - // Allocate host memory - ASSERT_EQUAL(hipHostMalloc(&sizeTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&thdRankTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&isValidTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&syncTestH, nBytes), hipSuccess); - - // Launch Kernel - void *params[4]; - params[0] = &sizeTestD; - params[1] = &thdRankTestD; - params[2] = &isValidTestD; - params[3] = &syncTestD; - hipLaunchCooperativeKernel(kernel_cg_grid_group_type, - 2, - blockSize, - params, - 0, - 0); - - // Copy result from device to host - ASSERT_EQUAL(hipMemcpy(sizeTestH, sizeTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(thdRankTestH, thdRankTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(isValidTestH, isValidTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - - // Validate results for both blocks together - for (int i = 0; i < 2 * blockSize; ++i) { - ASSERT_EQUAL(sizeTestH[i], 2 * blockSize); - ASSERT_EQUAL(thdRankTestH[i], i); - ASSERT_EQUAL(isValidTestH[i], 1); - ASSERT_EQUAL(syncTestH[i], 200); - } - - // Free device memory - ASSERT_EQUAL(hipFree(sizeTestD), hipSuccess); - ASSERT_EQUAL(hipFree(thdRankTestD), hipSuccess); - ASSERT_EQUAL(hipFree(isValidTestD), hipSuccess); - ASSERT_EQUAL(hipFree(syncTestD), hipSuccess); - - //Free host memory - ASSERT_EQUAL(hipHostFree(sizeTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(thdRankTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(isValidTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(syncTestH), hipSuccess); -} - -int main() -{ - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - - // Test block sizes which are powers of 2 - int i = 0; - while (true) { - int blockSize = pow(2, i); - if (blockSize > maxThreadsPerBlock) - break; - test_cg_grid_group_type(blockSize); - ++i; - } - - // Test some random block sizes - for(int j = 0; j < 10 ; ++j) { - int blockSize = rand() % maxThreadsPerBlock; - test_cg_grid_group_type(blockSize); - } - - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaBaseType.cpp b/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaBaseType.cpp deleted file mode 100644 index 55d59c9ddb..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaBaseType.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "hip/hip_cooperative_groups.h" -#include -#include - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -using namespace cooperative_groups; - -static __device__ int gm[2]; - -static __global__ -void kernel_cg_grid_group_type_via_base_type(int *sizeTestD, - int *thdRankTestD, - int *isValidTestD, - int *syncTestD) -{ - grid_group tg = this_grid(); - int gIdx = (blockIdx.x * blockDim.x) + threadIdx.x; - - // Test size - sizeTestD[gIdx] = tg.size(); - - // Test thread_rank - thdRankTestD[gIdx] = tg.thread_rank(); - - // Test is_valid - isValidTestD[gIdx] = tg.is_valid(); - - // Test sync - if (blockIdx.x == 0 && threadIdx.x == 0) - gm[0] = 10; - else if (blockIdx.x == 1 && threadIdx.x == 0) - gm[1] = 20; - tg.sync(); - syncTestD[gIdx] = gm[1] * gm[0]; -} - -static void test_cg_grid_group_type_via_base_type(int blockSize) -{ - int nBytes = sizeof(int) * 2 * blockSize; - int *sizeTestD, *sizeTestH; - int *thdRankTestD, *thdRankTestH; - int *isValidTestD, *isValidTestH; - int *syncTestD, *syncTestH; - - // Allocate device memory - ASSERT_EQUAL(hipMalloc(&sizeTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&thdRankTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&isValidTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&syncTestD, nBytes), hipSuccess); - - // Allocate host memory - ASSERT_EQUAL(hipHostMalloc(&sizeTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&thdRankTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&isValidTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&syncTestH, nBytes), hipSuccess); - - // Launch Kernel - void *params[4]; - params[0] = &sizeTestD; - params[1] = &thdRankTestD; - params[2] = &isValidTestD; - params[3] = &syncTestD; - hipLaunchCooperativeKernel(kernel_cg_grid_group_type_via_base_type, - 2, - blockSize, - params, - 0, - 0); - - // Copy result from device to host - ASSERT_EQUAL(hipMemcpy(sizeTestH, sizeTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(thdRankTestH, thdRankTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(isValidTestH, isValidTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - - // Validate results for both blocks together - for (int i = 0; i < 2 * blockSize; ++i) { - ASSERT_EQUAL(sizeTestH[i], 2 * blockSize); - ASSERT_EQUAL(thdRankTestH[i], i); - ASSERT_EQUAL(isValidTestH[i], 1); - ASSERT_EQUAL(syncTestH[i], 200); - } - - // Free device memory - ASSERT_EQUAL(hipFree(sizeTestD), hipSuccess); - ASSERT_EQUAL(hipFree(thdRankTestD), hipSuccess); - ASSERT_EQUAL(hipFree(isValidTestD), hipSuccess); - ASSERT_EQUAL(hipFree(syncTestD), hipSuccess); - - //Free host memory - ASSERT_EQUAL(hipHostFree(sizeTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(thdRankTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(isValidTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(syncTestH), hipSuccess); -} - -int main() -{ - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - - // Test block sizes which are powers of 2 - int i = 0; - while (true) { - int blockSize = pow(2, i); - if (blockSize > maxThreadsPerBlock) - break; - test_cg_grid_group_type_via_base_type(blockSize); - ++i; - } - - // Test some random block sizes - for(int j = 0; j < 10 ; ++j) { - int blockSize = rand() % maxThreadsPerBlock; - test_cg_grid_group_type_via_base_type(blockSize); - } - - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaPublicApi.cpp b/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaPublicApi.cpp deleted file mode 100644 index 8ebe9a61d1..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/hipCGGridGroupTypeViaPublicApi.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "hip/hip_cooperative_groups.h" -#include -#include - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -using namespace cooperative_groups; - -static __device__ int gm[2]; - -static __global__ -void kernel_cg_grid_group_type_via_public_api(int *sizeTestD, - int *thdRankTestD, - int *isValidTestD, - int *syncTestD) -{ - grid_group gg = this_grid(); - int gIdx = (blockIdx.x * blockDim.x) + threadIdx.x; - - // Test group_size api - sizeTestD[gIdx] = group_size(gg); - - // Test thread_rank api - thdRankTestD[gIdx] = thread_rank(gg); - - // Test is_valid api - isValidTestD[gIdx] = gg.is_valid(); - - // Test sync api - if (blockIdx.x == 0 && threadIdx.x == 0) - gm[0] = 10; - else if (blockIdx.x == 1 && threadIdx.x == 0) - gm[1] = 20; - sync(gg); - syncTestD[gIdx] = gm[1] * gm[0]; -} - -static void test_cg_grid_group_type_via_public_api(int blockSize) -{ - int nBytes = sizeof(int) * 2 * blockSize; - int *sizeTestD, *sizeTestH; - int *thdRankTestD, *thdRankTestH; - int *isValidTestD, *isValidTestH; - int *syncTestD, *syncTestH; - - // Allocate device memory - ASSERT_EQUAL(hipMalloc(&sizeTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&thdRankTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&isValidTestD, nBytes), hipSuccess); - ASSERT_EQUAL(hipMalloc(&syncTestD, nBytes), hipSuccess); - - // Allocate host memory - ASSERT_EQUAL(hipHostMalloc(&sizeTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&thdRankTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&isValidTestH, nBytes), hipSuccess); - ASSERT_EQUAL(hipHostMalloc(&syncTestH, nBytes), hipSuccess); - - // Launch Kernel - void *params[4]; - params[0] = &sizeTestD; - params[1] = &thdRankTestD; - params[2] = &isValidTestD; - params[3] = &syncTestD; - hipLaunchCooperativeKernel(kernel_cg_grid_group_type_via_public_api, - 2, - blockSize, - params, - 0, - 0); - - // Copy result from device to host - ASSERT_EQUAL(hipMemcpy(sizeTestH, sizeTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(thdRankTestH, thdRankTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(isValidTestH, isValidTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - ASSERT_EQUAL(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost), - hipSuccess); - - // Validate results for both blocks together - for (int i = 0; i < 2 * blockSize; ++i) { - ASSERT_EQUAL(sizeTestH[i], 2 * blockSize); - ASSERT_EQUAL(thdRankTestH[i], i); - ASSERT_EQUAL(isValidTestH[i], 1); - ASSERT_EQUAL(syncTestH[i], 200); - } - - // Free device memory - ASSERT_EQUAL(hipFree(sizeTestD), hipSuccess); - ASSERT_EQUAL(hipFree(thdRankTestD), hipSuccess); - ASSERT_EQUAL(hipFree(isValidTestD), hipSuccess); - ASSERT_EQUAL(hipFree(syncTestD), hipSuccess); - - //Free host memory - ASSERT_EQUAL(hipHostFree(sizeTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(thdRankTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(isValidTestH), hipSuccess); - ASSERT_EQUAL(hipHostFree(syncTestH), hipSuccess); -} - -int main() -{ - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - - // Test block sizes which are powers of 2 - int i = 0; - while (true) { - int blockSize = pow(2, i); - if (blockSize > maxThreadsPerBlock) - break; - test_cg_grid_group_type_via_public_api(blockSize); - ++i; - } - - // Test some random block sizes - for(int j = 0; j < 10 ; ++j) { - int blockSize = rand() % maxThreadsPerBlock; - test_cg_grid_group_type_via_public_api(blockSize); - } - - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/hipLaunchCoopMultiKernel.cpp b/tests/src/runtimeApi/cooperativeGrps/hipLaunchCoopMultiKernel.cpp deleted file mode 100644 index ce882b12d0..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/hipLaunchCoopMultiKernel.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Simple test for hipLaunchCooperativeKernelMultiDevice API. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 -rdc=true -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include -#include -#include -#include -#include -#include "hip/hip_cooperative_groups.h" -#include "test_common.h" - -using namespace std::chrono; - -const static uint BufferSizeInDwords = 256 * 1024 * 1024; -const static uint numQueues = 4; -const static uint numIter = 100; -constexpr uint NumKernelArgs = 4; - -#include - -__global__ void test_gws(uint* buf, uint bufSize, long* tmpBuf, long* result) -{ - extern __shared__ long tmp[]; - uint groups = gridDim.x; - uint group_id = blockIdx.x; - uint local_id = threadIdx.x; - uint chunk = gridDim.x * blockDim.x; - - uint i = group_id * blockDim.x + local_id; - long sum = 0; - while (i < bufSize) { - sum += buf[i]; - i += chunk; - } - tmp[local_id] = sum; - __syncthreads(); - i = 0; - if (local_id == 0) { - sum = 0; - while (i < blockDim.x) { - sum += tmp[i]; - i++; - } - tmpBuf[group_id] = sum; - } - - // wait - cooperative_groups::this_grid().sync(); - - if (((blockIdx.x * blockDim.x) + threadIdx.x) == 0) { - for (uint i = 1; i < groups; ++i) { - sum += tmpBuf[i]; - } - //*result = sum; - result[1 + cooperative_groups::this_multi_grid().grid_rank()] = sum; - } - cooperative_groups::this_multi_grid().sync(); - if (cooperative_groups::this_multi_grid().grid_rank() == 0) { - sum = 0; - for (uint i = 1; i <= cooperative_groups::this_multi_grid().num_grids(); ++i) { - sum += result[i]; - } - *result = sum; - } -} - -int main() { - float *A, *B; - long* dC; - - uint32_t* init = new uint32_t[BufferSizeInDwords]; - for (uint32_t i = 0; i < BufferSizeInDwords; ++i) { - init[i] = i; - } - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - size_t copySizeInDwords = BufferSizeInDwords / nGpu; - - uint* dA[nGpu]; - long* dB[nGpu]; - hipStream_t stream[nGpu]; - hipDeviceProp_t deviceProp[nGpu]; - - for (int i = 0; i < nGpu; i++) { - HIPCHECK(hipSetDevice(i)); - - // Calculate the device occupancy to know how many blocks can be run concurrently - hipGetDeviceProperties(&deviceProp[i], 0); - if (!deviceProp[i].cooperativeMultiDeviceLaunch) { - printf("Device doesn't support cooperative launch!"); - passed(); - return 0; - } - size_t SIZE = copySizeInDwords * sizeof(uint); - - HIPCHECK(hipMalloc((void**)&dA[i], SIZE)); - HIPCHECK(hipMalloc((void**)&dB[i], 64 * deviceProp[i].multiProcessorCount * sizeof(long))); - if (i == 0) { - HIPCHECK(hipHostMalloc((void**)&dC, (nGpu + 1) * sizeof(long))); - } - HIPCHECK(hipMemcpy(dA[i], &init[i * copySizeInDwords] , SIZE, hipMemcpyHostToDevice)); - HIPCHECK(hipStreamCreate(&stream[i])); - hipDeviceSynchronize(); - } - - dim3 dimBlock; - dim3 dimGrid; - dimGrid.x = 1; - dimGrid.y = 1; - dimGrid.z = 1; - dimBlock.x = 64; - dimBlock.y = 1; - dimBlock.z = 1; - - int numBlocks = 0; - uint workgroups[3] = {64, 128, 256}; - - hipLaunchParams* launchParamsList = new hipLaunchParams[nGpu]; - std::time_t end_time; - double time = 0; - for (uint set = 0; set < 3; ++set) { - void* args[nGpu * NumKernelArgs]; - std::cout << "---------- Test#" << set << ", size: "<< BufferSizeInDwords << - " dwords ---------------\n"; - for (int i = 0; i < nGpu; i++) { - HIPCHECK(hipSetDevice(i)); - dimBlock.x = workgroups[set]; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, - test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long))); - - std::cout << "GPU(" << i << ") Block size: " << dimBlock.x << - " Num blocks per CU: " << numBlocks << "\n"; - - dimGrid.x = deviceProp[i].multiProcessorCount * std::min(numBlocks, 32); - - args[i * NumKernelArgs] = (void*)&dA[i]; - args[i * NumKernelArgs + 1] = (void*)©SizeInDwords; - args[i * NumKernelArgs + 2] = (void*)&dB[i]; - args[i * NumKernelArgs + 3] = (void*)&dC; - - launchParamsList[i].func = reinterpret_cast(test_gws); - launchParamsList[i].gridDim = dimGrid; - launchParamsList[i].blockDim = dimBlock; - launchParamsList[i].sharedMem = dimBlock.x * sizeof(long); - launchParamsList[i].stream = stream[i]; - launchParamsList[i].args = &args[i * NumKernelArgs]; - } - - system_clock::time_point start = system_clock::now(); - hipLaunchCooperativeKernelMultiDevice(launchParamsList, nGpu, 0); - for (int i = 0; i < nGpu; i++) { - hipStreamSynchronize(stream[i]); - } - system_clock::time_point end = system_clock::now(); - std::chrono::duration elapsed_seconds = end - start; - end_time = std::chrono::system_clock::to_time_t(end); - - time += elapsed_seconds.count(); - - size_t processedDwords = copySizeInDwords * nGpu; - if (*dC != (((long)(processedDwords) * (processedDwords - 1)) / 2)) { - std::cout << "Data validation failed ("<< *dC << " != " << - (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2) << - ") for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; - std::cout << "Test failed! \n"; - } - } - - delete [] launchParamsList; - - std::cout << "finished computation at " << std::ctime(&end_time) << - "elapsed time: " << time << "s\n"; - - hipSetDevice(0); - hipFree(dC); - for (int i = 0; i < nGpu; i++) { - hipFree(dA[i]); - hipFree(dB[i]); - HIPCHECK(hipStreamDestroy(stream[i])); - } - - delete [] init; - passed(); - return 0; -} diff --git a/tests/src/runtimeApi/cooperativeGrps/hipLaunchCooperativeKernel.cpp b/tests/src/runtimeApi/cooperativeGrps/hipLaunchCooperativeKernel.cpp deleted file mode 100644 index 1966d4027e..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/hipLaunchCooperativeKernel.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for hipLaunchCooperativeKernel API. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "hip/hip_cooperative_groups.h" -#include -#include -#include "test_common.h" - -using namespace std::chrono; - -namespace cg = cooperative_groups; - -const static uint BufferSizeInDwords = 448 * 1024 * 1024; - -__global__ void test_gws(uint* buf, uint bufSize, long* tmpBuf, long* result) -{ - extern __shared__ long tmp[]; - uint offset = blockIdx.x * blockDim.x + threadIdx.x; - uint stride = gridDim.x * blockDim.x; - cg::grid_group gg = cg::this_grid(); - - long sum = 0; - for (uint i = offset; i < bufSize; i += stride) { - sum += buf[i]; - } - tmp[threadIdx.x] = sum; - - __syncthreads(); - - if (threadIdx.x == 0) { - sum = 0; - for (uint i = 0; i < blockDim.x; i++) { - sum += tmp[i]; - } - tmpBuf[blockIdx.x] = sum; - } - - gg.sync(); - - if (offset == 0) { - for (uint i = 1; i < gridDim.x; ++i) { - sum += tmpBuf[i]; - } - *result = sum; - } -} - -int main() { - float *A, *B, *Ad, *Bd; - uint* dA; - long* dB; - long* dC; - - uint32_t* init = new uint32_t[BufferSizeInDwords]; - for (uint32_t i = 0; i < BufferSizeInDwords; ++i) { - init[i] = i; - } - - hipDeviceProp_t deviceProp; - - hipGetDeviceProperties(&deviceProp, 0); - if (!deviceProp.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - passed(); - return 0; - } - - std::cout << "info: running on bus 0x" << deviceProp.pciBusID << " " << deviceProp.name << "\n"; - - size_t SIZE = BufferSizeInDwords * sizeof(uint); - - HIPCHECK(hipMalloc((void**)&dA, SIZE)); - HIPCHECK(hipHostMalloc((void**)&dC, sizeof(long))); - HIPCHECK(hipMemcpy(dA, init, SIZE, hipMemcpyHostToDevice)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - dim3 dimBlock = dim3(1); - dim3 dimGrid = dim3(1); - int numBlocks = 0; - uint workgroups[4] = {32, 64, 128, 256}; - - system_clock::time_point start = system_clock::now(); - - for (uint i = 0; i < 4; ++i) { - - dimBlock.x = workgroups[i]; - // Calculate the device occupancy to know how many blocks can be run concurrently - hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, - test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long)); - - dimGrid.x = deviceProp.multiProcessorCount * std::min(numBlocks, 32); - HIPCHECK(hipMalloc((void**)&dB, dimGrid.x * sizeof(long))); - - void *params[4]; - params[0] = (void*)&dA; - params[1] = (void*)&BufferSizeInDwords; - params[2] = (void*)&dB; - params[3] = (void*)&dC; - - std::cout << "Testing with grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_gws), dimGrid, dimBlock, params, dimBlock.x * sizeof(long), stream)); - - HIPCHECK(hipMemcpy(init, dC, sizeof(long), hipMemcpyDeviceToHost)); - - if (*dC != (((long)(BufferSizeInDwords) * (BufferSizeInDwords - 1)) / 2)) { - std::cout << "Data validation failed for grid size = " << dimGrid.x << " and block size = " << dimBlock.x << "\n"; - HIPCHECK(hipStreamDestroy(stream)); - hipFree(dC); - hipFree(dB); - hipFree(dA); - delete [] init; - std::cout << "Test failed! \n"; - return 0; - } else { - std::cout << "info: data validated!\n"; - } - - } - - system_clock::time_point end = system_clock::now(); - std::chrono::duration elapsed_seconds = end - start; - std::time_t end_time = std::chrono::system_clock::to_time_t(end); - std::cout << "finished computation at " << std::ctime(&end_time) << - "elapsed time: " << elapsed_seconds.count() << "s\n"; - - HIPCHECK(hipStreamDestroy(stream)); - hipFree(dC); - hipFree(dB); - hipFree(dA); - delete [] init; - passed(); - return 0; -} diff --git a/tests/src/runtimeApi/cooperativeGrps/multi_gpu_api_failure_tests.cpp b/tests/src/runtimeApi/cooperativeGrps/multi_gpu_api_failure_tests.cpp deleted file mode 100644 index c834a23cee..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/multi_gpu_api_failure_tests.cpp +++ /dev/null @@ -1,587 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/*The general idea of the application is to test how Cooperative Groups kernel -launches work when launching too many warps to multiple target devices. This -tests the following failure modes for hipLaunchCooperativeKernelMultiDevice: - 1) Do not launch more warps to any device than can fit on that device - 2) All device targets for the multi-device launch function must be different - 3) All streams must be explicit (non-NULL) - 4) The kernels sent in must be identical between devices - 5) The grid and block sizes must be identical between devices - 6) The block dimensions must be non-zero - 7) The dynamic shared memory size must be identical between devices. - -This test ensures that the proper error conditions are returned, even if the -target kernel does not actually use any fo the cooperative groups features. - -Note that tests 4, 5, and 7 only hold on Nvidia GPUs. AMD GPUs running ROCm -do not have these constraints. As such, the test checks to see whether they -should fail or succeed and compares this to what actually happens. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include -#include -#include "test_common.h" - -static inline void hipCheckAndFail(hipError_t errval, - const char *file, int line) { - hipError_t last_err = hipGetLastError(); - if (errval != hipSuccess) { - std::cerr << "hip error: " << hipGetErrorString(errval); - std::cerr << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - failed("\n"); - } - if (last_err != errval) { - std::cerr << "Error: the return value of a function was not the same "; - std::cerr << "as the value returned by hipGetLastError()" << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - std::cerr << " Function returned: " << hipGetErrorString(errval); - std::cerr << " (" << errval << ")" << std::endl; - std::cerr << "hipGetLastError() returned: " << hipGetErrorString(last_err); - std::cerr << " (" << last_err << ")" << std::endl; - failed("\n"); - } -} -#define hipCheckErr(errval) \ - do { hipCheckAndFail((errval), __FILE__, __LINE__); } while (0) - -static int cooperative_groups_support(int device_id) { - hipError_t err; - - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - int multi_gpu_cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&multi_gpu_cooperative_attribute, - hipDeviceAttributeCooperativeMultiDeviceLaunch, device_id)); - - if (!multi_gpu_cooperative_attribute) { - std::cerr << "Multi-GPU cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - if (device_properties.cooperativeMultiDeviceLaunch == 0) { - std::cerr << "Multi-GPU cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - return 1; -} - -static int support_for_separate_kernels(int device_id) { - hipError_t err; - - int separate_kernel_supported; - HIPCHECK(hipDeviceGetAttribute(&separate_kernel_supported, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, - device_id)); - if (!separate_kernel_supported) { - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeMultiDeviceUnmatchedFunc == 0) { - return 0; - } - return 1; -} - -static int support_for_separate_grid_sizes(int device_id) { - hipError_t err; - int separate_sizes_supported; - HIPCHECK(hipDeviceGetAttribute(&separate_sizes_supported, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, - device_id)); - if (!separate_sizes_supported) { - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeMultiDeviceUnmatchedGridDim == 0) { - return 0; - } - return 1; -} - -static int support_for_separate_block_dims(int device_id) { - hipError_t err; - int separate_sizes_supported; - HIPCHECK(hipDeviceGetAttribute(&separate_sizes_supported, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, - device_id)); - if (!separate_sizes_supported) { - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeMultiDeviceUnmatchedBlockDim == 0) { - return 0; - } - return 1; -} - -static int support_for_separate_shared_sizes(int device_id) { - hipError_t err; - int separate_sizes_supported; - HIPCHECK(hipDeviceGetAttribute(&separate_sizes_supported, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, - device_id)); - if (!separate_sizes_supported) { - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeMultiDeviceUnmatchedSharedMem == 0) { - return 0; - } - return 1; -} - -__global__ void test_kernel(long long *array) { - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - array[rank] += clock64(); -} - -__global__ void second_test_kernel(long long *array) { - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - array[rank] += clock64(); -} - -__global__ void test_kernel_gfx11(long long *array) { -#ifdef __HIP_PLATFORM_AMD__ - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - array[rank] += wall_clock64(); -#endif -} - -__global__ void second_test_kernel_gfx11(long long *array) { -#ifdef __HIP_PLATFORM_AMD__ - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - array[rank] += wall_clock64(); -#endif -} - -int main(int argc, char** argv) { - hipError_t err; - /*************************************************************************/ - /* Parse the command line parameters *************************************/ - // Arguments to pull out of the command line. - int device_num, FailFlag = 0; - HIPCHECK(hipGetDeviceCount(&device_num)); - if (device_num < 2) { - std::cout << "This test requires atleast two gpus but the system has "; - std::cout << " only "<< device_num <(&good_dev_array[i]), - good_size)); - HIPCHECK(hipMemsetAsync(good_dev_array[i], 0, good_size, streams[i])); - HIPCHECK(hipMalloc(reinterpret_cast(&bad_dev_array[i]), - bad_size)); - HIPCHECK(hipMemsetAsync(bad_dev_array[i], 0, bad_size, streams[i])); - } - HIPCHECK(hipDeviceSynchronize()); - - /*************************************************************************/ - /* Launch the kernels ****************************************************/ - std::cout << "Launching a multi-GPU cooperative kernel with too many "; - std::cout << "warps..." << std::endl; - - void *dev_params[2][1]; - hipLaunchParams md_params[2]; - for (int i = 0; i < 2; i++) { - dev_params[i][0] = reinterpret_cast(&bad_dev_array[i]); - - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - md_params[i].func = reinterpret_cast(test_kernel_used); - md_params[i].gridDim = 2 * desired_blocks; - md_params[i].blockDim = warp_size; - md_params[i].sharedMem = 0; - md_params[i].stream = streams[i]; - md_params[i].args = dev_params[i]; - } - - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if (err != hipErrorCooperativeLaunchTooLarge) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with too many warps." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorCooperativeLaunchTooLarge ("; - std::cerr << hipErrorCooperativeLaunchTooLarge << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } else { - std::cout << "\tProperly saw this return "; - std::cout << "hipErrorCooperativeLaunchTooLarge" << std::endl; - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel to the same "; - std::cout << "device twice..." << std::endl; - for (int i = 0; i < 2; i++) { - dev_params[i][0] = reinterpret_cast(&good_dev_array[i]); - md_params[i].gridDim = desired_blocks; - md_params[i].stream = streams[0]; - } - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if (err != hipErrorInvalidDevice) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "to the same device twice." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidDevice ("; - std::cerr << hipErrorInvalidDevice << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } else { - std::cout << "\tProperly saw this return "; - std::cout << "hipErrorInvalidDevice" << std::endl; - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel to the NULL "; - std::cout << "stream" << std::endl; - for (int i = 0; i < 2; i++) { - md_params[i].stream = NULL; - } - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if (err != hipErrorInvalidResourceHandle) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "to the NULL stream." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidResourceHandle ("; - std::cerr << hipErrorInvalidResourceHandle << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } else { - std::cout << "\tProperly saw this return "; - std::cout << "hipErrorInvalidResourceHandle" << std::endl; - } - HIPCHECK(hipDeviceSynchronize()); -#ifndef __HIP_PLATFORM_NVIDIA__ - std::cout << "Launching a multi-GPU cooperative kernel with two "; - std::cout << "different kernels." << std::endl; - bool supports_sep_kernels = true; - for (int i = 0; i < 2; i++) { - md_params[i].stream = streams[i]; - if (!support_for_separate_kernels((dev + i))) { - supports_sep_kernels = false; - } - } - auto second_test_kernel_used = IsGfx11() ? second_test_kernel_gfx11 : second_test_kernel; - md_params[1].func = reinterpret_cast(second_test_kernel_used); - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if ((supports_sep_kernels && err != hipSuccess) || - (!supports_sep_kernels && err != hipErrorInvalidValue)) { - if (supports_sep_kernels) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different kernels." << std::endl; - std::cerr << "This SHOULD have succeeded with hipSuccess ("; - std::cerr << hipSuccess << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - } else { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different kernels." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidValue ("; - std::cerr << hipErrorInvalidValue << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - } - FailFlag = 1; - } else { - std::cout << "\tProperly saw this return "; - if (supports_sep_kernels) { - std::cout << "hipSuccess" << std::endl; - } else { - std::cout << "hipErrorInvalidValue" << std::endl; - } - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel with two "; - std::cout << "different grid sizes." << std::endl; - bool supports_sep_sizes = true; - for (int i = 0; i < 2; i++) { - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - md_params[i].func = reinterpret_cast(test_kernel_used); - md_params[i].gridDim = i+1; - if (!support_for_separate_grid_sizes((dev + i))) { - supports_sep_sizes = false; - } - } - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if ((supports_sep_sizes && err != hipSuccess) || - (!supports_sep_sizes && err == hipErrorInvalidValue)) { - if (supports_sep_sizes) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different grid sizes." << std::endl; - std::cerr << "This SHOULD have succeeded with hipSuccess ("; - std::cerr << hipSuccess << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - } else { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different grid sizes." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidValue ("; - std::cerr << hipErrorInvalidValue << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } - } else { - std::cout << "\tProperly saw this return "; - if (supports_sep_kernels) { - std::cout << "hipSuccess" << std::endl; - } else { - std::cout << "hipErrorInvalidValue" << std::endl; - } - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel with two "; - std::cout << "different block dimensions." << std::endl; - supports_sep_sizes = true; - for (int i = 0; i < 2; i++) { - md_params[i].gridDim = desired_blocks; - md_params[i].blockDim = i+1; - if (!support_for_separate_block_dims((dev + i))) { - supports_sep_sizes = false; - } - } - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if ((supports_sep_sizes && err != hipSuccess) || - (!supports_sep_sizes && err == hipErrorInvalidValue)) { - if (supports_sep_sizes) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different block dimensions." << std::endl; - std::cerr << "This SHOULD have succeeded with hipSuccess ("; - std::cerr << hipSuccess << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - } else { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different block dimensions." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidValue ("; - std::cerr << hipErrorInvalidValue << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } - } else { - std::cout << "\tProperly saw this return "; - if (supports_sep_kernels) { - std::cout << "hipSuccess" << std::endl; - } else { - std::cout << "hipErrorInvalidValue" << std::endl; - } - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel with block "; - std::cout << "dimensions of zero." << std::endl; - for (int i = 0; i < 2; i++) { - md_params[i].blockDim = 0; - } - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if (err != hipErrorInvalidConfiguration) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with block dimensions of zero." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidConfiguration ("; - std::cerr << hipErrorInvalidConfiguration << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } else { - std::cout << "\tProperly saw this return "; - std::cout << "hipErrorInvalidConfiguration" << std::endl; - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel with two "; - std::cout << "different shared memory sizes." << std::endl; - supports_sep_sizes = true; - for (int i = 0; i < 2; i++) { - md_params[i].blockDim = warp_size; - md_params[i].sharedMem = i; - if (!support_for_separate_shared_sizes((dev + i))) { - supports_sep_sizes = false; - } - } - err = hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0); - if ((supports_sep_sizes && err != hipSuccess) || - (!supports_sep_sizes && err == hipErrorInvalidValue)) { - if (supports_sep_sizes) { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different shared memory sizes." << std::endl; - std::cerr << "This SHOULD have succeeded with hipSuccess ("; - std::cerr << hipSuccess << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - } else { - std::cerr << "ERROR! Tried to launch a multi-GPU cooperative kernel "; - std::cerr << "with two different shared memory sizes." << std::endl; - std::cerr << "This SHOULD have failed with the error "; - std::cerr << "hipErrorInvalidValue ("; - std::cerr << hipErrorInvalidValue << ")." << std::endl; - std::cerr << "Instead, the launch returned " << hipGetErrorName(err); - std::cerr << " (" << err << ")" << std::endl; - FailFlag = 1; - } - } else { - std::cout << "\tProperly saw this return "; - if (supports_sep_kernels) { - std::cout << "hipSuccess" << std::endl; - } else { - std::cout << "hipErrorInvalidValue" << std::endl; - } - } - HIPCHECK(hipDeviceSynchronize()); - - std::cout << "Launching a multi-GPU cooperative kernel with maximum "; - std::cout << "number of warps..." << std::endl; - for (int i = 0; i < 2; i++) { - md_params[i].sharedMem = 0; - } - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0)); - std::cout << "\tProperly launched." << std::endl; - - HIPCHECK(hipDeviceSynchronize()); -#endif - for (int m = 0; m < 2; ++m) { - HIPCHECK(hipFree(good_dev_array[m])); - HIPCHECK(hipFree(bad_dev_array[m])); - HIPCHECK(hipStreamDestroy(streams[m])); - } - if (FailFlag == 1) { - break; - } - } - if (FailFlag == 1) { - failed("\n"); - } else { - passed(); - } -} diff --git a/tests/src/runtimeApi/cooperativeGrps/multi_gpu_streams.cpp b/tests/src/runtimeApi/cooperativeGrps/multi_gpu_streams.cpp deleted file mode 100644 index a04caa6188..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/multi_gpu_streams.cpp +++ /dev/null @@ -1,660 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/*The general idea of the application is to test how multi-GPU Cooperative -Groups kernel launches to a stream interact with other things that may be -simultaneously running in the same streams. - -The HIP specification says that a multi-GPU cooperative launch will wait -until all of the streams it's using finish their work. Only then will the -cooperative kernel be launched to all of the devices. Then no other work -can take part in the any of the streams until all of the multi-GPU -cooperative work is done. - -However, there are flags that allow you to disable each of these -serialization points: hipCooperativeLaunchMultiDeviceNoPreSync and -hipCooperativeLaunchMultiDeviceNoPostSync. - -As such, this benchmark tests the following five situations launching -to two GPUs (and thus two streams): - - 1. Normal multi-GPU cooperative kernel: - This should result in the following pattern: - Stream 0: Cooperative - Stream 1: Cooperative - 2. Regular kernel launches and multi-GPU cooperative kernel launches - with the default flags, resulting in the following pattern: - Stream 0: Regular --> Cooperative - Stream 1: --> Cooperative --> Regular - - 3. Regular kernel launches and multi-GPU cooperative kernel launches - that turn off "pre-sync". This should allow a cooperative kernel - to launch even if work is already in a stream pointing to - another GPU. - This should result in the following pattern: - Stream 0: Regular --> Cooperative - Stream 1: Cooperative --> Regular - - 4. Regular kernel launches and multi-GPU cooperative kernel launches - that turn off "post-sync". This should allow a new kernel to enter - a GPU even if another GPU still has a cooperative kernel on it. - This should result in the following pattern: - Stream 0: Regular --> Cooperative - Stream 1: --> Cooperative--> Regular - - 5. Regular kernel launches and multi-GPU cooperative kernel launches - that turn off both pre- and post-sync. This should allow any of - the kernels to launch to their GPU regardless of the status of - other kernels in other multi-GPU stream groups. - This should result in the following pattern: - Stream 0: Regular --> Cooperative - Stream 1: Cooperative --> Regular - -We time how long it takes to run each of these benchmarks and print it as -the output of the benchmark. The kernels themselves are just useless time- -wasting code so that the kernel takes a meaningful amount of time on the -GPU before it exits. We only launch a single wavefront for each kernel, so -any serialization should not be because of GPU occupancy concerns. - -If tests 2, 3, and 4 take roughly 3x as long as #1, that implies that -cooperative kernels are serialized as expected. - -If test #5 takes roughly twice as long as #1, that implies that the -overlap-allowing flags work as expected. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 -rdc=true -gencode arch=compute_70,code=sm_70 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -static inline void hipCheckAndFail(hipError_t errval, - const char *file, int line) { - hipError_t last_err = hipGetLastError(); - if (errval != hipSuccess) { - std::cerr << "hip error: " << hipGetErrorString(errval); - std::cerr << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - failed("\n"); - } - if (last_err != errval) { - std::cerr << "Error: the return value of a function was not the same "; - std::cerr << "as the value returned by hipGetLastError()" << std::endl; - std::cerr << " Location: " << file << ":" << line << std::endl; - std::cerr << " Function returned: " << hipGetErrorString(errval); - std::cerr << " (" << errval << ")" << std::endl; - std::cerr << "hipGetLastError() returned: " << hipGetErrorString(last_err); - std::cerr << " (" << last_err << ")" << std::endl; - failed("\n"); - } -} -#define hipCheckErr(errval) \ - do { hipCheckAndFail((errval), __FILE__, __LINE__); } while (0) - -static int cooperative_groups_support(int device_id) { - hipError_t err; - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - int multi_gpu_cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&multi_gpu_cooperative_attribute, - hipDeviceAttributeCooperativeMultiDeviceLaunch, device_id)); - if (!multi_gpu_cooperative_attribute) { - std::cerr << "Multi-GPU cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - if (device_properties.cooperativeMultiDeviceLaunch == 0) { - std::cerr << "Multi-GPU cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - return 1; -} - -__global__ void test_coop_kernel(unsigned int loops, long long *array, - int fast_gpu) { - cooperative_groups::multi_grid_group mgrid = - cooperative_groups::this_multi_grid(); - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - - if (mgrid.grid_rank() == fast_gpu) { - return; - } - - for (int i = 0; i < loops; i++) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - array[rank] += clock64(); - } -} - -__global__ void test_kernel(uint32_t loops, unsigned long long *array) { - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - - for (int i = 0; i < loops; i++) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - array[rank] += clock64(); - } -} - -__global__ void test_coop_kernel_gfx11(unsigned int loops, long long *array, - int fast_gpu) { -#ifdef __HIP_PLATFORM_AMD__ - cooperative_groups::multi_grid_group mgrid = - cooperative_groups::this_multi_grid(); - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - - if (mgrid.grid_rank() == fast_gpu) { - return; - } - - for (int i = 0; i < loops; i++) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - array[rank] += wall_clock64(); - } -#endif -} - -__global__ void test_kernel_gfx11(uint32_t loops, unsigned long long *array) { -#ifdef __HIP_PLATFORM_AMD__ - unsigned int rank = blockIdx.x * blockDim.x + threadIdx.x; - - for (int i = 0; i < loops; i++) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - array[rank] += wall_clock64(); - } -#endif -} - -int main(int argc, char** argv) { - hipError_t err; - int device_num, FailFlag = 0; - uint32_t loops = 2000; - uint32_t fast_loops = 1; - int32_t fast_gpu = -1; - HIPCHECK(hipGetDeviceCount(&device_num)); - if (device_num < 2) { - std::cout << "This test requires atleast two gpus but the system has "; - std::cout << " only "<< device_num < max_blocks_per_sm * num_sm) { - std::cerr << "The requested number of blocks will not fit on the GPU"; - std::cerr << std::endl; - std::cerr << "You requested " << desired_blocks << " but we can only "; - std::cerr << "fit " << (max_blocks_per_sm * num_sm) << std::endl; - failed("\n"); - } - - /*************************************************************************/ - /* Create the streams we will use in this test. **************************/ - hipStream_t streams[2]; - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipStreamCreate(&streams[i])); - } - - /*************************************************************************/ - /* Set up data to pass into the kernelx **********************************/ - - // Alocate the host input buffer, and two device-focused buffers that we - // will use for our test. - unsigned long long *dev_array[2]; - for (int i = 0; i < 2; i++) { - int good_size = desired_blocks * warp_size * sizeof(long long); - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipMalloc(reinterpret_cast(&dev_array[i]), good_size)); - HIPCHECK(hipMemsetAsync(dev_array[i], 0, good_size, streams[i])); - } - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - - /*************************************************************************/ - /* Launch the kernels ****************************************************/ - void *dev_params[2][3]; - hipLaunchParams md_params[2]; - std::chrono::time_point start_time[6]; - std::chrono::time_point end_time[6]; - - std::cout << "Test 0: Launching a multi-GPU cooperative kernel...\n"; - std::cout << "This should result in the following pattern:" << std::endl; - std::cout << "GPU " << dev << ": Long Coop Kernel" << std::endl; - std::cout << "GPU " << (dev + 1) << ": Long Coop Kernel" << std::endl; - - auto test_coop_kernel_used = IsGfx11() ? test_coop_kernel_gfx11 : test_coop_kernel; - for (int i = 0; i < 2; i++) { - dev_params[i][0] = reinterpret_cast(&loops); - dev_params[i][1] = reinterpret_cast(&dev_array[i]); - dev_params[i][2] = reinterpret_cast(&fast_gpu); - md_params[i].func = reinterpret_cast(test_coop_kernel_used); - md_params[i].gridDim = desired_blocks; - md_params[i].blockDim = warp_size; - md_params[i].sharedMem = 0; - md_params[i].stream = streams[i]; - md_params[i].args = dev_params[i]; - } - - start_time[0] = std::chrono::system_clock::now(); - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0)); - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - end_time[0] = std::chrono::system_clock::now(); - - std::cout << std::endl; - std::cout << "Test 1: Launching a multi-GPU cooperative kernel with the "; - std::cout << "following pattern:" << std::endl; - std::cout << "GPU " << dev << ": Standard Kernel --> Long Coop Kernel\n"; - std::cout << "GPU " << (dev + 1) << ": --> Coop "; - std::cout << "--> Standard Kernel\n"; - fast_gpu = 1; - start_time[1] = std::chrono::system_clock::now(); - HIPCHECK(hipSetDevice(dev)); - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[0], loops, dev_array[0]); - HIPCHECK(hipGetLastError()); - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0)); - HIPCHECK(hipSetDevice(dev + 1)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[1], loops, dev_array[1]); - HIPCHECK(hipGetLastError()); - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - end_time[1] = std::chrono::system_clock::now(); - fast_gpu = -1; - - std::cout << std::endl; - std::cout << "Test 2: Launching a multi-GPU cooperative kernel with the "; - std::cout << "following pattern:" << std::endl; - std::cout << "GPU " << dev << ": Standard Kernel --> Coop" << std::endl; - std::cout << "GPU " << (dev + 1) << ": --> Long Coop"; - std::cout << " Kernel --> "; - std::cout << "Standard Kernel\n"; - fast_gpu = 0; - start_time[2] = std::chrono::system_clock::now(); - HIPCHECK(hipSetDevice(dev)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[0], loops, dev_array[0]); - HIPCHECK(hipGetLastError()); - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0)); - HIPCHECK(hipSetDevice(dev + 1)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[1], loops, dev_array[1]); - HIPCHECK(hipGetLastError()); - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - end_time[2] = std::chrono::system_clock::now(); - fast_gpu = -1; - - std::cout << std::endl; - std::cout << "Test 3: Launching a multi-GPU cooperative kernel with the "; - std::cout << "ability to overlap regular and cooperative kernels "; - std::cout << "only at the beginning." << std::endl; - std::cout << "This should result in the following pattern:" << std::endl; - std::cout << "GPU " << dev << ": Standard Kernel --> Coop" << std::endl; - std::cout << "GPU " << (dev + 1) << ": Long Coop Kernel --> Standard"; - std::cout<< " Kernel\n"; - fast_gpu = 0; - start_time[3] = std::chrono::system_clock::now(); - HIPCHECK(hipSetDevice(dev)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[0], loops, dev_array[0]); - HIPCHECK(hipGetLastError()); - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, - hipCooperativeLaunchMultiDeviceNoPreSync)); - HIPCHECK(hipSetDevice(dev + 1)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[1], loops, dev_array[1]); - HIPCHECK(hipGetLastError()); - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - end_time[3] = std::chrono::system_clock::now(); - fast_gpu = -1; - - std::cout << std::endl; - std::cout << "Test 4: Launching a multi-GPU cooperative kernel with the "; - std::cout << "ability to overlap regular and cooperative kernels "; - std::cout << "only at the end." << std::endl; - std::cout << "This should result in the following pattern:" << std::endl; - std::cout << "GPU " << dev << ": Standard Kernel --> Long Coop Kernel\n"; - std::cout << "GPU " << (dev + 1) << ": --> Coop --> "; - std::cout << "Standard Kernel\n"; - fast_gpu = 1; - start_time[4] = std::chrono::system_clock::now(); - HIPCHECK(hipSetDevice(dev)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[0], loops, dev_array[0]); - HIPCHECK(hipGetLastError()); - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, - hipCooperativeLaunchMultiDeviceNoPostSync)); - HIPCHECK(hipSetDevice(dev + 1)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[1], loops, dev_array[1]); - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - end_time[4] = std::chrono::system_clock::now(); - fast_gpu = -1; - - std::cout << std::endl; - std::cout << "Test 5: Launching a multi-GPU cooperative kernel with the "; - std::cout << "ability to overlap regular and cooperative kernels"; - std::cout << std::endl; - std::cout << "This should result in the following pattern:" << std::endl; - std::cout << "GPU " << dev << ": Standard Kernel --> Long Coop Kernel\n"; - std::cout << "GPU " << (dev + 1) << ": Long Coop Kernel --> Standard"; - std::cout << " Kernel\n"; - start_time[5] = std::chrono::system_clock::now(); - HIPCHECK(hipSetDevice(dev)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[0], loops, dev_array[0]); - HIPCHECK(hipGetLastError()); - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, - hipCooperativeLaunchMultiDeviceNoPreSync | - hipCooperativeLaunchMultiDeviceNoPostSync)); - HIPCHECK(hipSetDevice(dev + 1)); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - hipLaunchKernelGGL(test_kernel_used, dim3(desired_blocks), dim3(warp_size), 0, - streams[1], loops, dev_array[1]); - HIPCHECK(hipGetLastError()); - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice(dev + i)); - HIPCHECK(hipDeviceSynchronize()); - } - end_time[5] = std::chrono::system_clock::now(); - - std::chrono::duration single_kernel_time = - (end_time[0] - start_time[0]); - std::chrono::duration serialized_gpu0_time = - (end_time[1] - start_time[1]); - std::chrono::duration serialized_gpu1_time = - (end_time[2] - start_time[2]); - std::chrono::duration pre_overlapped_time = - (end_time[3] - start_time[3]); - std::chrono::duration post_overlapped_time = - (end_time[4] - start_time[4]); - std::chrono::duration overlapped_time = - (end_time[5] - start_time[5]); - - std::cout << "Test 0: A single kernel on both GPUs took:" << std::endl; - std::cout << " " << single_kernel_time.count(); - std::cout << " seconds" << std::endl; - std::cout << std::endl; - std::cout << "Test 1: Serialized set of three kernels with GPU0"; - std::cout << " being long took:"; - std::cout << " " << serialized_gpu0_time.count(); - std::cout << " seconds" << std::endl; - std::cerr << "Expect between " << (2.7 * single_kernel_time.count()); - std::cerr << " and "; - std::cerr << (3.3 * single_kernel_time.count()) << " seconds.\n"; - std::cout << std::endl; - std::cout << "Test 2: Serialized set of three kernels with GPU1"; - std::cout << " being long took:" << std::endl; - std::cout << " " << serialized_gpu1_time.count(); - std::cout << " seconds" << std::endl; - std::cerr << "Expect between " << (2.7 * single_kernel_time.count()); - std::cerr << " and "; - std::cerr << (3.3 * single_kernel_time.count()) << " seconds.\n"; - std::cout << std::endl; - std::cout << "Test 3: Multiple kernels with pre-overlap allowed took:\n"; - std::cout << " " << pre_overlapped_time.count(); - std::cout << " seconds" << std::endl; - std::cerr << "Expect between " << (1.7 * single_kernel_time.count()); - std::cerr << " and "; - std::cerr << (2.3 * single_kernel_time.count()) << " seconds.\n"; - std::cout << std::endl; - std::cout << "Test 4: Multiple kernels with post-overlap allowed took:\n"; - std::cout << " " << post_overlapped_time.count(); - std::cout << " seconds" << std::endl; - std::cerr << "Expect between " << (1.7 * single_kernel_time.count()); - std::cerr << " and "; - std::cerr << (2.3 * single_kernel_time.count()) << " seconds."; - std::cout << std::endl; - std::cout << "Test 5: Multiple kernels with overlap allowed took:\n"; - std::cout << " " << overlapped_time.count(); - std::cout << " seconds" << std::endl; - std::cerr << "Expect between " << (1.8 * single_kernel_time.count()); - std::cerr << " and "; - std::cerr << (2.2 * single_kernel_time.count()) << " seconds.\n"; - - // Test that fully not-overlapped kernels take roughly 3x as long as one - // cooperative kernel. - if (serialized_gpu0_time > 3.3 * single_kernel_time || - serialized_gpu0_time < 2.7 * single_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Test 1, the first case where all kernels should be "; - std::cerr << "serialized, had a runtime that was very different "; - std::cerr << "than what was expected." << std::endl; - std::cerr << "Was " << serialized_gpu0_time.count() << " seconds.\n"; - std::cerr << "Expected between "; - std::cerr << (2.7 * single_kernel_time.count()) << " and "; - std::cerr << (3.3 * single_kernel_time.count()) << " seconds.\n"; - std::cerr << "Were they truly serialized?" << std::endl; - FailFlag = 1; - } - - // Test that fully not-overlapped kernels take roughly 3x as long as one - // cooperative kernel. - if (serialized_gpu1_time > 3.3 * single_kernel_time || - serialized_gpu1_time < 2.7 * single_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Test 2, the second case where all kernels should be "; - std::cerr << "serialized, had a runtime that was very different "; - std::cerr << "than what was expected." << std::endl; - std::cerr << "Was " << serialized_gpu1_time.count(); - std::cerr << " seconds." << std::endl; - std::cerr << "Expected between "; - std::cerr << (2.7 * single_kernel_time.count()) << " and "; - std::cerr << (3.3 * single_kernel_time.count()) << " seconds.\n"; - std::cerr << "Were they truly serialized?" << std::endl; - FailFlag = 1; - } - - // Test that kernels that can overlap only before the cooperative kernel - // launches kernels take roughly the same time (in this case) - if (pre_overlapped_time > 2.3 * single_kernel_time || - pre_overlapped_time < 1.7 * single_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Test 3, the case where the last kernel is serialized, had "; - std::cerr << "a runtime that was very different than what was "; - std::cerr << "expected." << std::endl; - std::cerr << "Was " << pre_overlapped_time.count() << " seconds.\n"; - std::cerr << "Expected between "; - std::cerr << (1.7 * single_kernel_time.count()) << " and "; - std::cerr << (2.3 * single_kernel_time.count()) << " seconds.\n"; - FailFlag = 1; - } - - // Test that kernels that can overlap only after the cooperative kernel - // launches kernels take roughly the same time (in this case) - if (post_overlapped_time > 2.3 * single_kernel_time || - post_overlapped_time < 1.7 * single_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Teste 4, the case where the first kernel is "; - std::cerr << "serialized, had a runtime that was very different "; - std::cerr << "than what was expected." << std::endl; - std::cerr << "Was " << post_overlapped_time.count() << " seconds.\n"; - std::cerr << "Expected between "; - std::cerr << (1.7 * single_kernel_time.count()) << " and "; - std::cerr << (2.3 * single_kernel_time.count()) << " seconds.\n"; - FailFlag = 1; - } - - // Test that, with the right flags on the kernel launch, that we prevent - // incomplete launches from serializing the cooperative launch streams. - if (overlapped_time > 2.2 * single_kernel_time || - overlapped_time < 1.8 * single_kernel_time) { - std::cerr << "ERROR!" << std::endl; - std::cerr << "Test 5, the case where normal and cooperative kernel "; - std::cerr << "launches should overlap, does not appear to have done so."; - std::cerr << std::endl; - std::cerr << "Was " << overlapped_time.count() << " seconds.\n"; - std::cerr << "Expected between "; - std::cerr << (1.8 * single_kernel_time.count()) << " and "; - std::cerr << (2.2 * single_kernel_time.count()) << " seconds.\n"; - std::cerr << "Is the normal kernel being serialized with the "; - std::cerr << "cooperative kernels on different streams?" << std::endl; - FailFlag = 1; - } - for (int k = 0; k < 2; ++k) { - HIPCHECK(hipFree(dev_array[k])); - HIPCHECK(hipStreamDestroy(streams[k])); - } - if (FailFlag == 1) { - break; - } - } - if (FailFlag == 1) { - failed("\n"); - } else { - passed(); - } -} diff --git a/tests/src/runtimeApi/cooperativeGrps/multi_grid_group_all_gpus.cpp b/tests/src/runtimeApi/cooperativeGrps/multi_grid_group_all_gpus.cpp deleted file mode 100644 index e96b967838..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/multi_grid_group_all_gpus.cpp +++ /dev/null @@ -1,470 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test Description: -/*The general idea of the application is to launch N warps to all GPUs detected -in the HIP system. N is a command-line parameter, but the user should set N -small enough that all warps can be on each of the GPUs at the same time. - -All of the warps do a "work loop". Within the work loop, every warp -atomically increments a global variable that is shared between both fo the -target GPUs. The value returned from this atomic increment entriely depends -on the order the warps from the GPUs arrive at the atomic instruction. Each -warp then stores the result into a global array based on its warp ID. - -We also add a sleep/wait loop into the code so that the last warp runs much -slower than everyone else. As such, it should store much larger values than -all the other warps. - -If there are no barrier within the loop, then warp 0 will likely ge to the -global variable the first time while all the other warps have each -incremented it many times. If the barrier properly works, then each warp -will increment the variable once per time through the loop, and all threads -will sleep on the barrier waiting for the last warp to finally catch up. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -rdc=true -gencode arch=compute_70,code=sm_70 - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -static int cooperative_groups_support(int device_id) { - hipError_t err; - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - int multi_gpu_cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&multi_gpu_cooperative_attribute, - hipDeviceAttributeCooperativeMultiDeviceLaunch, device_id)); - if (!multi_gpu_cooperative_attribute) { - std::cerr << "Multi-GPU cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - if (device_properties.cooperativeMultiDeviceLaunch == 0) { - std::cerr << "Multi-GPU cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - return 1; -} - -static int verify_barrier_buffer(unsigned int loops, unsigned int warps, - unsigned int *host_buffer, - unsigned int num_devs) { - unsigned int max_in_this_loop = 0; - for (unsigned int i = 0; i < loops; i++) { - max_in_this_loop += (warps * num_devs); - for (unsigned int j = 0; j < warps; j++) { - if (host_buffer[i*warps+j] > max_in_this_loop) { - std::cerr << "Barrier failure!" << std::endl; - std::cerr << " Buffer entry " << i*warps+j; - std::cerr << " contains the value " << host_buffer[i*warps+j]; - std::cerr << " but it should not be more than "; - std::cerr << max_in_this_loop << std::endl; - return -1; - } - } - } - std::cout << "\tBarriers work properly!" << std::endl; - return 0; -} - -static int verify_multi_gpu_buffer(unsigned int loops, unsigned int array_val) { - unsigned int desired_val = 0; - for (int i = 0; i < loops; i++) { - if (i % 2 == 0) { - desired_val += 2; - } - else { - desired_val *= 2; - } - } - std::cout << "Desired value is " << desired_val << std::endl; - if (array_val != desired_val) { - std::cerr << "ERROR! Multi-grid barrier does not appear to work."; - std::cerr << std::endl; - std::cerr << "Expected the multi-GPUs to work together to produce "; - std::cerr << "the value " << desired_val << std::endl; - std::cerr << "However, the entry returned from the multi-GPU "; - std::cerr << "kernel was " << array_val << std::endl; - return -1; - } - std::cout << "\tMulti-GPU barriers appear to work here." << std::endl; - return 0; -} - -__global__ void -test_kernel(unsigned int *atomic_val, unsigned int *global_array, - unsigned int *array, uint32_t loops) { - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - cooperative_groups::multi_grid_group mgrid = - cooperative_groups::this_multi_grid(); - unsigned rank = grid.thread_rank(); - unsigned global_rank = mgrid.thread_rank(); - - int offset = blockIdx.x; - for (int i = 0; i < loops; i++) { - // Make the last thread run way behind everyone else. - // If the grid barrier below fails, then the other threads may hit the - // atomicInc instruction many times before the last thread ever gets - // to it. - // As such, without the barrier, the last array entry will eventually - // contain a very large value, defined by however many times the other - // wavefronts make it through this loop. - // If the barrier works, then it will likely contain some number - // near "total number of blocks". It will be the last wavefront to - // reach the atomicInc, but everyone will have only hit the atomic once. - if (rank == (grid.size() - 1)) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - if (threadIdx.x == 0) { - array[offset] = atomicInc(atomic_val, UINT_MAX); - } - grid.sync(); - - // Make the last thread in the entire multi-grid run way behind - // everyone else. - // If the mgrid barrier below fails, then the two global_array entries - // will end up being out of sync, because the intermingling of adds - // and multiplies will not be aligned between to the two GPUs. - if (global_rank == (mgrid.size() - 1)) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - // During even iterations, add into your own array entry - // During odd iterations, add into your partner's array entry - unsigned grid_rank = mgrid.grid_rank(); - unsigned inter_gpu_offset = (grid_rank + i) % mgrid.num_grids(); - if (rank == (grid.size() - 1)) { - if (i % mgrid.num_grids() == 0) { - global_array[grid_rank] += 2; - } else { - global_array[inter_gpu_offset] *= 2; - } - } - mgrid.sync(); - offset += gridDim.x; - } -} - -__global__ void -test_kernel_gfx11(unsigned int *atomic_val, unsigned int *global_array, - unsigned int *array, uint32_t loops) { -#ifdef __HIP_PLATFORM_AMD__ - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - cooperative_groups::multi_grid_group mgrid = - cooperative_groups::this_multi_grid(); - unsigned rank = grid.thread_rank(); - unsigned global_rank = mgrid.thread_rank(); - - int offset = blockIdx.x; - for (int i = 0; i < loops; i++) { - // Make the last thread run way behind everyone else. - // If the grid barrier below fails, then the other threads may hit the - // atomicInc instruction many times before the last thread ever gets - // to it. - // As such, without the barrier, the last array entry will eventually - // contain a very large value, defined by however many times the other - // wavefronts make it through this loop. - // If the barrier works, then it will likely contain some number - // near "total number of blocks". It will be the last wavefront to - // reach the atomicInc, but everyone will have only hit the atomic once. - if (rank == (grid.size() - 1)) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - if (threadIdx.x == 0) { - array[offset] = atomicInc(atomic_val, UINT_MAX); - } - grid.sync(); - - // Make the last thread in the entire multi-grid run way behind - // everyone else. - // If the mgrid barrier below fails, then the two global_array entries - // will end up being out of sync, because the intermingling of adds - // and multiplies will not be aligned between to the two GPUs. - if (global_rank == (mgrid.size() - 1)) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - // During even iterations, add into your own array entry - // During odd iterations, add into your partner's array entry - unsigned grid_rank = mgrid.grid_rank(); - unsigned inter_gpu_offset = (grid_rank + i) % mgrid.num_grids(); - if (rank == (grid.size() - 1)) { - if (i % mgrid.num_grids() == 0) { - global_array[grid_rank] += 2; - } else { - global_array[inter_gpu_offset] *= 2; - } - } - mgrid.sync(); - offset += gridDim.x; - } -#endif -} - -int main(int argc, char** argv) { - hipError_t err; - int num_devices = 0; - uint32_t loops = 2; - uint32_t warps = 10; - uint32_t block_size = 1; - - std::cout << "Loops: " << loops << std::endl; - std::cout << "Warps: " << warps << std::endl; - std::cout << "Block size: " << block_size << std::endl; - - HIPCHECK(hipGetDeviceCount(&num_devices)); - if (num_devices < 2) { - std::cout << "Not enough GPUs to run test." << std::endl; - std::cout << "We require at least 2 GPUs, but only found "; - std::cout << num_devices << std::endl; - std::cout << "Skipping the test with PASSED result\n"; - passed(); - } - - uint32_t device_num[num_devices]; - - /*************************************************************************/ - /* Test whether target device supports cooperative groups ****************/ - for (int i = 0; i < num_devices; i++) { - device_num[i] = i; - if (!cooperative_groups_support(device_num[i])) { - std::cout << "Skipping the test with Pass result.\n"; - passed(); - } - } - - /*************************************************************************/ - /* Test whether the requested size will fit on the GPU *******************/ - int warp_sizes[num_devices]; - int num_sms[num_devices]; - hipDeviceProp_t device_properties[num_devices]; - int warp_size = INT_MAX; - int num_sm = INT_MAX; - for (int i = 0; i < num_devices; i++) { - HIPCHECK(hipGetDeviceProperties(&device_properties[i], device_num[i])); - warp_sizes[i] = device_properties[i].warpSize; - if (warp_sizes[i] < warp_size) { - warp_size = warp_sizes[i]; - } - num_sms[i] = device_properties[i].multiProcessorCount; - if (num_sms[i] < num_sm) { - num_sm = num_sms[i]; - } - std::cout << "Device " << (i + 1); - std::cout << " name: " << device_properties[i].name << std::endl; - } - std::cout << std::endl; - - int num_threads_in_block = block_size * warp_size; - - // Calculate the device occupancy to know how many blocks can be run. - int max_blocks_per_sm_arr[num_devices]; - int max_blocks_per_sm = INT_MAX; - for (int i = 0; i < num_devices; i++) { - HIPCHECK(hipSetDevice(device_num[i])); - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor( - &max_blocks_per_sm_arr[i], test_kernel_used, num_threads_in_block, 0)); - if (max_blocks_per_sm_arr[i] < max_blocks_per_sm) { - max_blocks_per_sm = max_blocks_per_sm_arr[i]; - } - } - - int requested_blocks = warps / block_size; - if (requested_blocks > max_blocks_per_sm * num_sm) { - std::cerr << "Requesting to run " << requested_blocks << " blocks, "; - std::cerr << "but we can only guarantee to simultaneously run "; - std::cerr << (max_blocks_per_sm * num_sm) << std::endl; - failed("\n"); - } - - /*************************************************************************/ - /* Set up data to pass into the kernel ***********************************/ - // Each block will output a single value per loop. - uint32_t total_buffer_len = requested_blocks*loops; - - // Alocate the buffer that will hold the kernel's output, and which will - // also be used to globally synchronize during GWS initialization - unsigned int *host_buffer[num_devices]; - unsigned int *kernel_buffer[num_devices]; - unsigned int *kernel_atomic[num_devices]; - hipStream_t streams[num_devices]; - for (int i = 0; i < num_devices; i++) { - host_buffer[i] = (unsigned int*)calloc(total_buffer_len, - sizeof(unsigned int)); - HIPCHECK(hipSetDevice(device_num[i])); - HIPCHECK(hipMalloc(reinterpret_cast(&kernel_buffer[i]), - total_buffer_len * sizeof(unsigned int))); - HIPCHECK(hipMemcpy(kernel_buffer[i], host_buffer[i], - total_buffer_len * sizeof(unsigned int), - hipMemcpyHostToDevice)); - HIPCHECK(hipMalloc(reinterpret_cast(&kernel_atomic[i]), - sizeof(unsigned int))); - HIPCHECK(hipMemset(kernel_atomic[i], 0, sizeof(unsigned int))); - HIPCHECK(hipStreamCreate(&streams[i])); - } - - // Single kernel atomic shared between both devices; put it on the host - unsigned int* global_array; - HIPCHECK(hipHostMalloc(reinterpret_cast(&global_array), - num_devices * sizeof(unsigned int), 0)); - HIPCHECK(hipMemset(global_array, 0, num_devices * sizeof(unsigned int))); - - /*************************************************************************/ - /* Launch the kernels ****************************************************/ - std::cout << "Launching a kernel with " << warps << " warps "; - std::cout << "in " << requested_blocks << " thread blocks."; - std::cout << std::endl; - - void *dev_params[num_devices][4]; - hipLaunchParams md_params[num_devices]; - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - for (int i = 0; i < num_devices; i++) { - dev_params[i][0] = reinterpret_cast(&kernel_atomic[i]); - dev_params[i][1] = reinterpret_cast(&global_array); - dev_params[i][2] = reinterpret_cast(&kernel_buffer[i]); - dev_params[i][3] = reinterpret_cast(&loops); - md_params[i].func = reinterpret_cast(test_kernel_used); - md_params[i].gridDim = requested_blocks; - md_params[i].blockDim = num_threads_in_block; - md_params[i].sharedMem = 0; - md_params[i].stream = streams[i]; - md_params[i].args = dev_params[i]; - } - - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, num_devices, 0)); - HIPCHECK(hipDeviceSynchronize()); - - /*************************************************************************/ - /* Read back the buffers and print out its data **************************/ - for (int dev = 0; dev < num_devices; dev++) { - HIPCHECK(hipMemcpy(host_buffer[dev], kernel_buffer[dev], - total_buffer_len * sizeof(unsigned int), - hipMemcpyDeviceToHost)); - } - - for (unsigned int i = 0; i < loops; i++) { - for (int dev = 0; dev < num_devices; dev++) { - std::cout << "+++++++++++++++++ Device " << dev; - std::cout << "+++++++++++++++++" << std::endl; - for (unsigned int j = 0; j < requested_blocks; j++) { - std::cout << "Buffer entry " << (i*warps+j); - std::cout << " (written by warp " << j << ")"; - std::cout << " is " << host_buffer[dev][i*requested_blocks+j]; - std::cout << std::endl; - } - } - std::cout << "==========================\n"; - } - for (unsigned int dev = 0; dev < num_devices; dev++) { - std::cout << "Testing output from device " << dev << std::endl; - int local_ret_val = verify_barrier_buffer(loops, requested_blocks, - host_buffer[dev], num_devices); - if (local_ret_val) { - failed("\n"); - } - } - - std::cout << std::endl << "The multi-GPU shared updates contain:\n"; - for (int i = 0; i < num_devices; i++) { - std::cout << "Entry " << i << ": "; - std::cout << global_array[i] << std::endl; - } - int flag = 0; - for (int dev = 0; dev < num_devices; dev++) { - std::cout << "Testing multi-GPU output for entry " << dev << std::endl; - int local_ret_val = verify_multi_gpu_buffer(loops, global_array[dev]); - if (local_ret_val) { - flag = 1; - } - } - for (int k = 0; k < num_devices; ++k) { - HIPCHECK(hipFree(kernel_buffer[k])); - HIPCHECK(hipFree(kernel_atomic[k])); - HIPCHECK(hipStreamDestroy(streams[k])); - free(host_buffer[k]); - } - if (flag == 1) { - failed("\n"); - } else { - passed(); - } -} diff --git a/tests/src/runtimeApi/cooperativeGrps/simple_coalesced_groups.cpp b/tests/src/runtimeApi/cooperativeGrps/simple_coalesced_groups.cpp deleted file mode 100644 index 9f2775b5dd..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/simple_coalesced_groups.cpp +++ /dev/null @@ -1,583 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test Description: -/* This test implements sum reduction kernel, first with each threads own rank - as input and comparing the sum with expected sum output derieved from n(n-1)/2 - formula. The second part, partitions this parent group into child subgroups - a.k.a tiles using using tiled_partition() collective operation. This can be called - with a static tile size, passed in templated non-type variable-tiled_partition, - or in runtime as tiled_partition(thread_group parent, tileSz). This test covers both these - cases. - This test tests functionality of cg group partitioning, (static and dynamic) and its respective - API's size(), thread_rank(), and sync(). -*/ - -#include "test_common.h" -#include -#include -#include - -using namespace cooperative_groups; - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -#define NUM_ELEMS 10000000 -#define NUM_THREADS_PER_BLOCK 512 -#define WAVE_SIZE 32 - -/* Test coalesced group's functionality. - * - */ - -__device__ int atomicAggInc(int *ptr) { - coalesced_group g = coalesced_threads(); - int prev; - // elect the first active thread to perform atomic add - if (g.thread_rank() == 0) { - prev = atomicAdd(ptr, g.size()); - } - // broadcast previous value within the warp - // and add each active thread’s rank to it - prev = g.thread_rank() + g.shfl(prev, 0); - return prev; -} - -__global__ void kernel_shfl (int * dPtr, int *dResults, int srcLane, int cg_sizes) { - int id = threadIdx.x + blockIdx.x * blockDim.x; - if (id % cg_sizes == 0) { - coalesced_group const& g = coalesced_threads(); - int rank = g.thread_rank(); - int val = dPtr[rank]; - dResults[rank] = g.shfl(val, srcLane); - return; - } -} - -__global__ void kernel_shfl_any_to_any (int *randVal, int *dsrcArr, int *dResults, int cg_sizes) { - - int id = threadIdx.x + blockIdx.x * blockDim.x; - - if (id % cg_sizes == 0) { - coalesced_group const& g = coalesced_threads(); - int rank = g.thread_rank(); - int val = randVal[rank]; - dResults[rank] = g.shfl(val, dsrcArr[rank]); - return; - } - -} - -__global__ void filter_arr(int *dst, int *nres, const int *src, int n) { - int id = threadIdx.x + blockIdx.x * blockDim.x; - - for (int i = id; i < n; i += gridDim.x * blockDim.x) { - if (src[i] > 0) dst[atomicAggInc(nres)] = src[i]; - } -} - -/* Parallel reduce kernel. - * - * Step complexity: O(log n) - * Work complexity: O(n) - * - * Note: This kernel works only with power of 2 input arrays. - */ -__device__ int reduction_kernel(coalesced_group g, int* x, int val) { - int lane = g.thread_rank(); - int sz = g.size(); - - for (int i = g.size() / 2; i > 0; i /= 2) { - // use lds to store the temporary result - x[lane] = val; - // Ensure all the stores are completed. - g.sync(); - - if (lane < i) { - val += x[lane + i]; - } - // It must work on one tiled thread group at a time, - // and it must make sure all memory operations are - // completed before moving to the next stride. - // sync() here just does that. - g.sync(); - } - - // Choose the 0'th indexed thread that holds the reduction value to return - if (g.thread_rank() == 0) { - return val; - } - // Rest of the threads return no useful values - else { - return -1; - } -} - -__global__ void kernel_cg_coalesced_group_partition(unsigned int tileSz, int* result, - bool isGlobalMem, int* globalMem, int cg_sizes) { - - int id = threadIdx.x + blockIdx.x * blockDim.x; - if (id % cg_sizes == 0) { - coalesced_group threadBlockCGTy = coalesced_threads(); - int threadBlockGroupSize = threadBlockCGTy.size(); - - int* workspace = NULL; - - if (isGlobalMem) { - workspace = globalMem; - } else { - // Declare a shared memory - extern __shared__ int sharedMem[]; - workspace = sharedMem; - } - - int input, outputSum, expectedOutput; - - // input to reduction, for each thread, is its' rank in the group - input = threadBlockCGTy.thread_rank(); - - expectedOutput = (threadBlockGroupSize - 1) * threadBlockGroupSize / 2; - - outputSum = reduction_kernel(threadBlockCGTy, workspace, input); - - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Sum of all ranks 0..%d in coalesced_group is %d\n\n", - (int)threadBlockCGTy.size() - 1, outputSum); - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - coalesced_group tiledPartition = tiled_partition(threadBlockCGTy, tileSz); - - // This offset allows each group to have its own unique area in the workspace array - int workspaceOffset = threadBlockCGTy.thread_rank() - tiledPartition.thread_rank(); - - outputSum = reduction_kernel(tiledPartition, workspace + workspaceOffset, input); - - if (tiledPartition.thread_rank() == 0) { - printf( - " Sum of all ranks 0..%d in this tiledPartition group is %d. Corresponding parent thread " - "rank: %d\n", - tiledPartition.size() - 1, outputSum, input); - - result[input / (tileSz)] = outputSum; - } - return; - } -} - -__global__ void kernel_coalesced_active_groups() { - thread_block threadBlockCGTy = this_thread_block(); - int threadBlockGroupSize = threadBlockCGTy.size(); - - // input to reduction, for each thread, is its' rank in the group - int input = threadBlockCGTy.thread_rank(); - - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Creating odd and even set of active thread groups based on branch divergence\n\n"); - } - - threadBlockCGTy.sync(); - - // Group all active odd threads - if (threadBlockCGTy.thread_rank() % 2) { - coalesced_group activeOdd = coalesced_threads(); - - if (activeOdd.thread_rank() == 0) { - printf(" ODD: Size of odd set of active threads is %d." - " Corresponding parent thread_rank is %d.\n\n", - activeOdd.size(), threadBlockCGTy.thread_rank()); - } - } - else { // Group all active even threads - coalesced_group activeEven = coalesced_threads(); - - if (activeEven.thread_rank() == 0) { - printf(" EVEN: Size of even set of active threads is %d." - " Corresponding parent thread_rank is %d.", - activeEven.size(), threadBlockCGTy.thread_rank()); - } - } - return; -} - -void printResults(int* ptr, int size) { - for (int i = 0; i < size; i++) { - std::cout << ptr[i] << " "; - } - std::cout << '\n'; -} - -void compareResults(int* cpu, int* gpu, int size) { - for (unsigned int i = 0; i < size / sizeof(int); i++) { - if (cpu[i] != gpu[i]) { - printf(" results do not match."); - } - } -} - -static void test_active_threads_grouping() { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = WAVE_SIZE; - - // Launch Kernel - hipLaunchKernelGGL(kernel_coalesced_active_groups, blockSize, threadsPerBlock, 0, 0); - - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - printf("\n...PASSED.\n\n"); -} - -// Search if the sum exists in the expected results array -void verifyResults(int* hPtr, int* dPtr, int size) { - int i = 0, j = 0; - for (i = 0; i < size; i++) { - for (j = 0; j < size; j++) { - if (hPtr[i] == dPtr[j]) { - break; - } - } - if (j == size) { - printf(" Result verification failed!"); - } - } -} - - -static void test_group_partition(unsigned int tileSz, bool useGlobalMem) { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = WAVE_SIZE; - - std::vector cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - int numTiles = ((blockSize * threadsPerBlock) / i) / tileSz; - - // numTiles = 0 when partitioning is possible. The below statement is to avoid - // out-of-bounds error and still evaluate failure case. - numTiles = (numTiles == 0) ? 1 : numTiles; - - // Build an array of expected reduction sum output on the host - // based on the sum of their respective thread ranks to use for verification - int* expectedSum = new int[numTiles]; - int temp = 0, sum = 0; - for (int i = 1; i <= numTiles; i++) { - sum = temp; - temp = (((tileSz * i) - 1) * (tileSz * i)) / 2; - expectedSum[i-1] = temp - sum; - } - - int* dResult = NULL; - hipMalloc(&dResult, sizeof(int) * numTiles); - - int* globalMem = NULL; - if (useGlobalMem) { - hipMalloc((void**)&globalMem, threadsPerBlock * sizeof(int)); - } - - int* hResult = NULL; - hipHostMalloc(&hResult, numTiles * sizeof(int), hipHostMallocDefault); - memset(hResult, 0, numTiles * sizeof(int)); - - // Launch Kernel - if (useGlobalMem) { - hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock, 0, 0, tileSz, - dResult, useGlobalMem, globalMem, i); - - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } else { - hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, tileSz, dResult, useGlobalMem, globalMem, i); - - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } - - hipMemcpy(hResult, dResult, numTiles * sizeof(int), hipMemcpyDeviceToHost); - verifyResults(expectedSum, hResult, numTiles); - // Free all allocated memory on host and device - hipFree(dResult); - hipFree(hResult); - if (useGlobalMem) { - hipFree(globalMem); - } - delete[] expectedSum; - - printf("\n...PASSED.\n\n"); - } -} -static void test_shfl_any_to_any() { - - std::vector cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - hipError_t err; - int blockSize = 1; - int threadsPerBlock = WAVE_SIZE; - - int totalThreads = blockSize * threadsPerBlock; - int group_size = (totalThreads + i - 1) / i; - int group_size_in_bytes = group_size * sizeof(int); - - int* hPtr = NULL; - int* dPtr = NULL; - int* dsrcArr = NULL; - int* dResults = NULL; - int* srcArr = (int*)malloc(group_size_in_bytes); - int* srcArrCpu = (int*)malloc(group_size_in_bytes); - - std::cout << "Testing coalesced_groups shfl any-to-any\n" < cg_sizes = {1, 2, 3}; - for (auto i : cg_sizes) { - - hipError_t err; - int blockSize = 1; - int threadsPerBlock = WAVE_SIZE; - - int totalThreads = blockSize * threadsPerBlock; - int group_size = (totalThreads + i - 1) / i; - int group_size_in_bytes = group_size * sizeof(int); - - int* hPtr = NULL; - int* dPtr = NULL; - int* dResults = NULL; - int srcLane = rand() % 1000; - int srcLaneCpu = 0; - std::cout << "Testing coalesced_groups shfl with srcLane " << srcLane << '\n' - << " and group size " << i <(malloc(sizeof(int) * NUM_ELEMS)); - - // Generate input data. - for (int i = 0; i < NUM_ELEMS; i++) { - data_to_filter[i] = rand() % numOfBuckets; - } - - - hipMalloc(&d_data_to_filter, sizeof(int) * NUM_ELEMS); - hipMalloc(&d_filtered_data, sizeof(int) * NUM_ELEMS); - hipMalloc(&d_nres, sizeof(int)); - - hipMemcpy(d_data_to_filter, data_to_filter, - sizeof(int) * NUM_ELEMS, hipMemcpyHostToDevice); - hipMemset(d_nres, 0, sizeof(int)); - - dim3 dimBlock(NUM_THREADS_PER_BLOCK, 1, 1); - dim3 dimGrid((NUM_ELEMS / NUM_THREADS_PER_BLOCK) + 1, 1, 1); - - filter_arr<<>>(d_filtered_data, d_nres, d_data_to_filter, - NUM_ELEMS); - - - hipMemcpy(&nres, d_nres, sizeof(int), hipMemcpyDeviceToHost); - - filtered_data = reinterpret_cast(malloc(sizeof(int) * nres)); - - hipMemcpy(filtered_data, d_filtered_data, sizeof(int) * nres, - hipMemcpyDeviceToHost); - - int *host_filtered_data = - reinterpret_cast(malloc(sizeof(int) * NUM_ELEMS)); - - // Generate host output with host filtering code. - int host_flt_count = 0; - for (int i = 0; i < NUM_ELEMS; i++) { - if (data_to_filter[i] > 0) { - host_filtered_data[host_flt_count++] = data_to_filter[i]; - } - } - - printf("\nWarp Aggregated Atomics %s \n", - (host_flt_count == nres) ? "PASSED" : "FAILED"); - - // Now, testing shfl collective - std::cout << "Now testing shfl collective as a broadcast" << '\n' << std::endl; - - for (int i = 0; i < 100; i++) { - test_shfl_broadcast(); - } - - - // Now, testing shfl collective - std::cout << "Now testing shfl operations any-to-any member lanes" << '\n' << std::endl; - - for (int i = 0; i < 100; i++) { - test_shfl_any_to_any(); - } - - // Now, pass a already coalesced_group that was partitioned - /* Test coalesced group partitioning */ - std::cout << "Now testing coalesced_groups partitioning" << '\n' << std::endl; - - int testNo = 1; - for (int memTy = 0; memTy < 2; memTy++) { - std::vector tileSizes = {2, 4, 8, 16, 32}; - for (auto i : tileSizes) { - std::cout << "TEST " << testNo << ":" << '\n' << std::endl; - test_group_partition(i, memTy); - testNo++; - } - } - - std::cout << "Now grouping active threads based on branch divergence" << '\n' << std::endl; - test_active_threads_grouping(); - - passed(); - return 0; -} diff --git a/tests/src/runtimeApi/cooperativeGrps/simple_grid_group_barrier.cpp b/tests/src/runtimeApi/cooperativeGrps/simple_grid_group_barrier.cpp deleted file mode 100644 index 78760ae06e..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/simple_grid_group_barrier.cpp +++ /dev/null @@ -1,286 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test Description: -/*The general idea of the application is to launch N warps. N is a command-line -parameter, but the user should set N small enough that all warps can be on -the GPU at the same time. - -All of the warps do a "work loop". Within the work loop, every warp -atomically increments a global variable. The value returned from this atomic -increment entriely depends on the order the threads arrive at the atomic -instruction. Each warp then stores the result into a global array based on its -warp ID. - -We also add a sleep/wait loop into the code so that the last warp runs much -slower than everyone else. As such, it should store much larger values than -all the other warps. - -If there are no barrier within the loop, then the last warp will likely get to -the global variable the first time after all the other warps have each -incremented it many times. If the barrier properly works, then each warp -will increment the variable once per time through the loop, and all threads -will sleep on the barrier waiting for the last warp to finally catch up. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -static int cooperative_groups_support(int device_id) { - hipError_t err; - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - return 1; -} - -static int verify_barrier_buffer(unsigned int loops, unsigned int warps, - unsigned int *host_buffer) { - unsigned int max_in_this_loop = 0; - for (unsigned int i = 0; i < loops; i++) { - max_in_this_loop += warps; - for (unsigned int j = 0; j < warps; j++) { - if (host_buffer[i*warps+j] > max_in_this_loop) { - std::cerr << "Barrier failure!" << std::endl; - std::cerr << " Buffer entry " << i*warps+j; - std::cerr << " contains the value " << host_buffer[i*warps+j]; - std::cerr << " but it should not be more than "; - std::cerr << max_in_this_loop << std::endl; - return -1; - } - } - } - std::cout << "Barriers work properly!" << std::endl; - return 0; -} - -__global__ void -test_kernel(unsigned int *atomic_val, unsigned int *array, - unsigned int loops) { - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - unsigned rank = grid.thread_rank(); - - int offset = blockIdx.x; - for (int i = 0; i < loops; i++) { - // Make the last thread run way behind everyone else. - // If the barrier below fails, then the other threads may hit the - // atomicInc instruction many times before the last thread ever gets - // to it. - // As such, without the barrier, the last array entry will eventually - // contain a very large value, defined by however many times the other - // wavefronts make it through this loop. - // If the barrier works, then it will likely contain some number - // near "total number of blocks". It will be the last wavefront to - // reach the atomicInc, but everyone will have only hit the atomic once. - if (rank == (grid.size() - 1)) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - - if (threadIdx.x == 0) { - array[offset] = atomicInc(&atomic_val[0], UINT_MAX); - } - grid.sync(); - offset += gridDim.x; - } -} - -__global__ void -test_kernel_gfx11(unsigned int *atomic_val, unsigned int *array, - unsigned int loops) { -#ifdef __HIP_PLATFORM_AMD__ - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - unsigned rank = grid.thread_rank(); - - int offset = blockIdx.x; - for (int i = 0; i < loops; i++) { - // Make the last thread run way behind everyone else. - // If the barrier below fails, then the other threads may hit the - // atomicInc instruction many times before the last thread ever gets - // to it. - // As such, without the barrier, the last array entry will eventually - // contain a very large value, defined by however many times the other - // wavefronts make it through this loop. - // If the barrier works, then it will likely contain some number - // near "total number of blocks". It will be the last wavefront to - // reach the atomicInc, but everyone will have only hit the atomic once. - if (rank == (grid.size() - 1)) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - - if (threadIdx.x == 0) { - array[offset] = atomicInc(&atomic_val[0], UINT_MAX); - } - grid.sync(); - offset += gridDim.x; - } -#endif -} - -int main(int argc, char** argv) { - hipError_t err; - int device_num; - uint32_t loops = 2; - uint32_t warps = 10; - uint32_t block_size = 1; - HIPCHECK(hipGetDeviceCount(&device_num)); - for (int dev = 0; dev < device_num; ++dev) { - std::cout << "Device number: " << dev << std::endl; - std::cout << "Loops: " << loops << std::endl; - std::cout << "Warps: " << warps << std::endl; - std::cout << "Block size: " << block_size << std::endl; - - /*************************************************************************/ - /* Test whether target device supports cooperative groups ****************/ - HIPCHECK(hipSetDevice(dev)); - if (!cooperative_groups_support(dev)) { - std::cout << "Skipping the test with Pass result.\n"; - passed(); - } - - /*************************************************************************/ - /* Test whether the requested size will fit on the GPU *******************/ - int warp_size; - int num_sms; - int max_blocks_per_sm; - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, dev)); - warp_size = device_properties.warpSize; - num_sms = device_properties.multiProcessorCount; - - std::cout << "Device name: " << device_properties.name << std::endl; - std::cout << std::endl; - - int num_threads_in_block = block_size * warp_size; - - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - // Calculate the device occupancy to know how many blocks can be run. - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks_per_sm, - test_kernel_used, num_threads_in_block, 0)); - - int requested_blocks = warps / block_size; - if (requested_blocks > max_blocks_per_sm * num_sms) { - std::cerr << "Requesting to run " << requested_blocks << " blocks, "; - std::cerr << "but we can only guarantee to simultaneously run "; - std::cerr << (max_blocks_per_sm * num_sms) << std::endl; - failed("\n"); - } - - /*************************************************************************/ - /* Set up data to pass into the kernel ***********************************/ - // Each block will output a single value per loop. - uint32_t total_buffer_len = requested_blocks*loops; - - // Alocate the buffer that will hold the kernel's output, and which will - // also be used to globally synchronize during GWS initialization - unsigned int *host_buffer = (unsigned int*)calloc(total_buffer_len, - sizeof(unsigned int)); - - unsigned int *kernel_buffer; - HIPCHECK(hipMalloc(reinterpret_cast(&kernel_buffer), - total_buffer_len * sizeof(unsigned int))); - HIPCHECK(hipMemcpy(kernel_buffer, host_buffer, - total_buffer_len * sizeof(unsigned int), - hipMemcpyHostToDevice)); - - unsigned int *kernel_atomic; - HIPCHECK(hipMalloc(reinterpret_cast(&kernel_atomic), - sizeof(unsigned int))); - HIPCHECK(hipMemset(kernel_atomic, 0, sizeof(unsigned int))); - - /*************************************************************************/ - /* Launch the kernel *****************************************************/ - std::cout << "Launching a kernel with " << warps << " warps "; - std::cout << "in " << requested_blocks << " thread blocks."; - std::cout << std::endl; - - void *params[3]; - params[0] = reinterpret_cast(&kernel_atomic); - params[1] = reinterpret_cast(&kernel_buffer); - params[2] = reinterpret_cast(&loops); - test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - HIPCHECK(hipLaunchCooperativeKernel(reinterpret_cast(test_kernel_used), - requested_blocks, - num_threads_in_block, params, 0, NULL)); - - /*************************************************************************/ - /* Read back the buffer and print out its data****************************/ - HIPCHECK(hipMemcpy(host_buffer, kernel_buffer, - total_buffer_len * sizeof(unsigned int), - hipMemcpyDeviceToHost)); - - for (unsigned int i = 0; i < loops; i++) { - for (unsigned int j = 0; j < requested_blocks; j++) { - std::cout << "Buffer entry " << (i*warps+j); - std::cout << " (written by warp " << j << ")"; - std::cout << " is " << host_buffer[i * requested_blocks + j]; - std::cout << std::endl; - } - std::cout << "==========================\n"; - } - int ret_val = verify_barrier_buffer(loops, requested_blocks, host_buffer); - HIPCHECK(hipFree(kernel_buffer)); - HIPCHECK(hipFree(kernel_atomic)); - if (ret_val == -1) { - failed("\n"); - } else { - passed(); - } - } -} diff --git a/tests/src/runtimeApi/cooperativeGrps/simple_multi_grid_group_barrier.cpp b/tests/src/runtimeApi/cooperativeGrps/simple_multi_grid_group_barrier.cpp deleted file mode 100644 index 05c4bf76a8..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/simple_multi_grid_group_barrier.cpp +++ /dev/null @@ -1,470 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test Description: -/*The general idea of the application is to launch N warps to each of two GPUs. -N is a command-line parameter, but the user should set N small enough that all -warps can be on each of the GPUs at the same time. - -All of the warps do a "work loop". Within the work loop, every warp -atomically increments a global variable that is shared between both fo the -target GPUs. The value returned from this atomic increment entriely depends -on the order the warps from the GPUs arrive at the atomic instruction. Each -warp then stores the result into a global array based on its warp ID. - -We also add a sleep/wait loop into the code so that the last warp runs much -slower than everyone else. As such, it should store much larger values than -all the other warps. - -If there are no barrier within the loop, then warp 0 will likely ge to the -global variable the first time while all the other warps have each -incremented it many times. If the barrier properly works, then each warp -will increment the variable once per time through the loop, and all threads -will sleep on the barrier waiting for the last warp to finally catch up. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -rdc=true -gencode arch=compute_70,code=sm_70 - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -static int cooperative_groups_support(int device_id) { - hipError_t err; - int cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&cooperative_attribute, - hipDeviceAttributeCooperativeLaunch, device_id)); - if (!cooperative_attribute) { - std::cerr << "Cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - int multi_gpu_cooperative_attribute; - HIPCHECK(hipDeviceGetAttribute(&multi_gpu_cooperative_attribute, - hipDeviceAttributeCooperativeMultiDeviceLaunch, device_id)); - if (!multi_gpu_cooperative_attribute) { - std::cerr << "Multi-GPU cooperative launch support not available in "; - std::cerr << "the device attribute for device " << device_id; - std::cerr << std::endl; - return 0; - } - - hipDeviceProp_t device_properties; - HIPCHECK(hipGetDeviceProperties(&device_properties, device_id)); - if (device_properties.cooperativeLaunch == 0) { - std::cerr << "Cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - if (device_properties.cooperativeMultiDeviceLaunch == 0) { - std::cerr << "Multi-GPU cooperative group support not available in "; - std::cerr << "device properties." << std::endl; - return 0; - } - return 1; -} - -static int verify_barrier_buffer(unsigned int loops, unsigned int warps, - unsigned int *host_buffer, - unsigned int num_devs) { - unsigned int max_in_this_loop = 0; - for (unsigned int i = 0; i < loops; i++) { - max_in_this_loop += (warps * num_devs); - for (unsigned int j = 0; j < warps; j++) { - if (host_buffer[i*warps+j] > max_in_this_loop) { - std::cerr << "Barrier failure!" << std::endl; - std::cerr << " Buffer entry " << i*warps+j; - std::cerr << " contains the value " << host_buffer[i*warps+j]; - std::cerr << " but it should not be more than "; - std::cerr << max_in_this_loop << std::endl; - return -1; - } - } - } - std::cout << "\tBarriers work properly!" << std::endl; - return 0; -} - -static int verify_multi_gpu_buffer(unsigned int loops, unsigned int array_val) { - unsigned int desired_val = 0; - for (int i = 0; i < loops; i++) { - if (i % 2 == 0) { - desired_val += 2; - } - else { - desired_val *= 2; - } - } - std::cout << "Desired value is " << desired_val << std::endl; - if (array_val != desired_val) { - std::cerr << "ERROR! Multi-grid barrier does not appear to work."; - std::cerr << std::endl; - std::cerr << "Expected the multi-GPUs to work together to produce "; - std::cerr << "the value " << desired_val << std::endl; - std::cerr << "However, the entry returned from the multi-GPU "; - std::cerr << "kernel was " << array_val << std::endl; - return -1; - } - std::cout << "\tMulti-GPU barriers appear to work here." << std::endl; - return 0; -} - -__global__ void -test_kernel(unsigned int *atomic_val, unsigned int *global_array, - unsigned int *array, uint32_t loops) { - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - cooperative_groups::multi_grid_group mgrid = - cooperative_groups::this_multi_grid(); - unsigned rank = grid.thread_rank(); - unsigned global_rank = mgrid.thread_rank(); - - int offset = blockIdx.x; - for (int i = 0; i < loops; i++) { - // Make the last thread run way behind everyone else. - // If the grid barrier below fails, then the other threads may hit the - // atomicInc instruction many times before the last thread ever gets - // to it. - // As such, without the barrier, the last array entry will eventually - // contain a very large value, defined by however many times the other - // wavefronts make it through this loop. - // If the barrier works, then it will likely contain some number - // near "total number of blocks". It will be the last wavefront to - // reach the atomicInc, but everyone will have only hit the atomic once. - if (rank == (grid.size() - 1)) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - if (threadIdx.x == 0) { - array[offset] = atomicInc(atomic_val, UINT_MAX); - } - grid.sync(); - - // Make the last thread in the entire multi-grid run way behind - // everyone else. - // If the mgrid barrier below fails, then the two global_array entries - // will end up being out of sync, because the intermingling of adds - // and multiplies will not be aligned between to the two GPUs. - if (global_rank == (mgrid.size() - 1)) { - long long time_diff = 0; - long long last_clock = clock64(); - do { - long long cur_clock = clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - // During even iterations, add into your own array entry - // During odd iterations, add into your partner's array entry - unsigned grid_rank = mgrid.grid_rank(); - unsigned inter_gpu_offset = (grid_rank + i) % mgrid.num_grids(); - if (rank == (grid.size() - 1)) { - if (i % mgrid.num_grids() == 0) { - global_array[grid_rank] += 2; - } else { - global_array[inter_gpu_offset] *= 2; - } - } - mgrid.sync(); - offset += gridDim.x; - } -} - -__global__ void -test_kernel_gfx11(unsigned int *atomic_val, unsigned int *global_array, - unsigned int *array, uint32_t loops) { -#ifdef __HIP_PLATFORM_AMD__ - cooperative_groups::grid_group grid = cooperative_groups::this_grid(); - cooperative_groups::multi_grid_group mgrid = - cooperative_groups::this_multi_grid(); - unsigned rank = grid.thread_rank(); - unsigned global_rank = mgrid.thread_rank(); - - int offset = blockIdx.x; - for (int i = 0; i < loops; i++) { - // Make the last thread run way behind everyone else. - // If the grid barrier below fails, then the other threads may hit the - // atomicInc instruction many times before the last thread ever gets - // to it. - // As such, without the barrier, the last array entry will eventually - // contain a very large value, defined by however many times the other - // wavefronts make it through this loop. - // If the barrier works, then it will likely contain some number - // near "total number of blocks". It will be the last wavefront to - // reach the atomicInc, but everyone will have only hit the atomic once. - if (rank == (grid.size() - 1)) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - if (threadIdx.x == 0) { - array[offset] = atomicInc(atomic_val, UINT_MAX); - } - grid.sync(); - - // Make the last thread in the entire multi-grid run way behind - // everyone else. - // If the mgrid barrier below fails, then the two global_array entries - // will end up being out of sync, because the intermingling of adds - // and multiplies will not be aligned between to the two GPUs. - if (global_rank == (mgrid.size() - 1)) { - long long time_diff = 0; - long long last_clock = wall_clock64(); - do { - long long cur_clock = wall_clock64(); - if (cur_clock > last_clock) { - time_diff += (cur_clock - last_clock); - } - // If it rolls over, we don't know how much to add to catch up. - // So just ignore those slipped cycles. - last_clock = cur_clock; - } while(time_diff < 1000000); - } - // During even iterations, add into your own array entry - // During odd iterations, add into your partner's array entry - unsigned grid_rank = mgrid.grid_rank(); - unsigned inter_gpu_offset = (grid_rank + i) % mgrid.num_grids(); - if (rank == (grid.size() - 1)) { - if (i % mgrid.num_grids() == 0) { - global_array[grid_rank] += 2; - } else { - global_array[inter_gpu_offset] *= 2; - } - } - mgrid.sync(); - offset += gridDim.x; - } -#endif -} - -int main(int argc, char** argv) { - hipError_t err; - int device_num = 0, flag = 0; - uint32_t loops = 2; - uint32_t warps = 10; - uint32_t block_size = 1; - HIPCHECK(hipGetDeviceCount(&device_num)); - if (device_num < 2) { - std::cout << "This test needs atleast two gpus but found only"; - std::cout << device_num << std::endl; - std::cout << "Hence skipping the test with pass result\n"; - passed(); - } - - for (int d = 0; d < (device_num - 1); ++d) { - std::cout << "First device number: " << d << std::endl; - std::cout << "Second device number: " << (d + 1) << std::endl; - std::cout << "Loops: " << loops << std::endl; - std::cout << "Warps: " << warps << std::endl; - std::cout << "Block size: " << block_size << std::endl; - - /*************************************************************************/ - /* Test whether target device supports cooperative groups ****************/ - for (int i = 0; i < 2; i++) { - if (!cooperative_groups_support((d + i))) { - std::cout << "Skipping the test with Pass result.\n"; - passed(); - } - } - - /*************************************************************************/ - /* Test whether the requested size will fit on the GPU *******************/ - int warp_sizes[2]; - int num_sms[2]; - hipDeviceProp_t device_properties[2]; - int warp_size = INT_MAX; - int num_sm = INT_MAX; - for (int i = 0; i < 2; i++) { - HIPCHECK(hipGetDeviceProperties(&device_properties[i], (d + i))); - warp_sizes[i] = device_properties[i].warpSize; - if (warp_sizes[i] < warp_size) { - warp_size = warp_sizes[i]; - } - num_sms[i] = device_properties[i].multiProcessorCount; - if (num_sms[i] < num_sm) { - num_sm = num_sms[i]; - } - std::cout << "Device " << (d + i); - std::cout << " name: " << device_properties[i].name << std::endl; - } - std::cout << std::endl; - - int num_threads_in_block = block_size * warp_size; - - // Calculate the device occupancy to know how many blocks can be run. - int max_blocks_per_sm_arr[2]; - int max_blocks_per_sm = INT_MAX; - for (int i = 0; i < 2; i++) { - HIPCHECK(hipSetDevice((d + i))); - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor( - &max_blocks_per_sm_arr[i], test_kernel_used, num_threads_in_block, - 0)); - if (max_blocks_per_sm_arr[i] < max_blocks_per_sm) { - max_blocks_per_sm = max_blocks_per_sm_arr[i]; - } - } - - int requested_blocks = warps / block_size; - if (requested_blocks > max_blocks_per_sm * num_sm) { - std::cerr << "Requesting to run " << requested_blocks << " blocks, "; - std::cerr << "but we can only guarantee to simultaneously run "; - std::cerr << (max_blocks_per_sm * num_sm) << std::endl; - failed("\n"); - } - - /*************************************************************************/ - /* Set up data to pass into the kernel ***********************************/ - // Each block will output a single value per loop. - uint32_t total_buffer_len = requested_blocks*loops; - - // Alocate the buffer that will hold the kernel's output, and which will - // also be used to globally synchronize during GWS initialization - unsigned int *host_buffer[2]; - unsigned int *kernel_buffer[2]; - unsigned int *kernel_atomic[2]; - hipStream_t streams[2]; - for (int i = 0; i < 2; i++) { - host_buffer[i] = (unsigned int*)calloc(total_buffer_len, - sizeof(unsigned int)); - HIPCHECK(hipSetDevice((d + i))); - HIPCHECK(hipMalloc(reinterpret_cast(&kernel_buffer[i]), - total_buffer_len * sizeof(unsigned int))); - HIPCHECK(hipMemcpy(kernel_buffer[i], host_buffer[i], - total_buffer_len * sizeof(unsigned int), hipMemcpyHostToDevice)); - HIPCHECK(hipMalloc(reinterpret_cast(&kernel_atomic[i]), - sizeof(unsigned int))); - HIPCHECK(hipMemset(kernel_atomic[i], 0, sizeof(unsigned int))); - HIPCHECK(hipStreamCreate(&streams[i])); - } - - // Single kernel atomic shared between both devices; put it on the host - unsigned int* global_array; - HIPCHECK(hipHostMalloc(reinterpret_cast(&global_array), - 2 * sizeof(unsigned int), 0)); - HIPCHECK(hipMemset(global_array, 0, 2 * sizeof(unsigned int))); - - /*************************************************************************/ - /* Launch the kernels ****************************************************/ - std::cout << "Launching a kernel with " << warps << " warps "; - std::cout << "in " << requested_blocks << " thread blocks."; - std::cout << std::endl; - - void *dev_params[2][4]; - hipLaunchParams md_params[2]; - for (int i = 0; i < 2; i++) { - auto test_kernel_used = IsGfx11() ? test_kernel_gfx11 : test_kernel; - dev_params[i][0] = reinterpret_cast(&kernel_atomic[i]); - dev_params[i][1] = reinterpret_cast(&global_array); - dev_params[i][2] = reinterpret_cast(&kernel_buffer[i]); - dev_params[i][3] = reinterpret_cast(&loops); - md_params[i].func = reinterpret_cast(test_kernel_used); - md_params[i].gridDim = requested_blocks; - md_params[i].blockDim = num_threads_in_block; - md_params[i].sharedMem = 0; - md_params[i].stream = streams[i]; - md_params[i].args = dev_params[i]; - } - - HIPCHECK(hipLaunchCooperativeKernelMultiDevice(md_params, 2, 0)); - HIPCHECK(hipDeviceSynchronize()); - - /*************************************************************************/ - /* Read back the buffers and print out its data **************************/ - for (int dev = 0; dev < 2; dev++) { - HIPCHECK(hipMemcpy(host_buffer[dev], kernel_buffer[dev], - total_buffer_len * sizeof(unsigned int), - hipMemcpyDeviceToHost)); - } - - for (unsigned int i = 0; i < loops; i++) { - for (int dev = 0; dev < 2; dev++) { - std::cout << "+++++++++++++++++ Device " << (d + dev); - std::cout << "+++++++++++++++++" << std::endl; - for (unsigned int j = 0; j < requested_blocks; j++) { - std::cout << "Buffer entry " << (i * warps + j); - std::cout << " (written by warp " << j << ")"; - std::cout << " is " << host_buffer[dev][i * requested_blocks + j]; - std::cout << std::endl; - } - } - std::cout << "==========================\n"; - } - for (unsigned int dev = 0; dev < 2; dev++) { - std::cout << "Testing output from device " << (d + dev) << std::endl; - int local_ret_val = verify_barrier_buffer(loops, requested_blocks, - host_buffer[dev], 2); - if (local_ret_val == -1) { - flag = 1; - } - } - - std::cout << std::endl << "The multi-GPU shared updates contain:"; - std::cout << std::endl; - for (int i = 0; i < 2; i++) { - std::cout << "Entry " << i << ": "; - std::cout << global_array[i] << std::endl; - } - for (int dev = 0; dev < 2; dev++) { - std::cout << "Testing multi-GPU output for entry " << (d + dev); - std::cout << std::endl; - int local_ret_val = verify_multi_gpu_buffer(loops, global_array[dev]); - if (local_ret_val) { - flag = 1; - } - } - for (int k = 0; k < 2; ++k) { - HIPCHECK(hipFree(kernel_buffer[k])); - HIPCHECK(hipFree(kernel_atomic[k])); - HIPCHECK(hipStreamDestroy(streams[k])); - free(host_buffer[k]); - } - } - if (flag == 1) { - failed("\n"); - } else { - passed(); - } -} diff --git a/tests/src/runtimeApi/cooperativeGrps/simple_tiled_partition.cpp b/tests/src/runtimeApi/cooperativeGrps/simple_tiled_partition.cpp deleted file mode 100644 index 93e017f54d..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/simple_tiled_partition.cpp +++ /dev/null @@ -1,400 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test Description: -/* This test implements sum reduction kernel, first with each threads own rank - as input and comparing the sum with expected sum output derieved from n(n-1)/2 - formula. The second part, partitions this parent group into child subgroups - a.k.a tiles using using tiled_partition() collective operation. This can be called - with a static tile size, passed in templated non-type variable-tiled_partition, - or in runtime as tiled_partition(thread_group parent, tileSz). This test covers both these - cases. - This test tests functionality of cg group partitioning, (static and dynamic) and its respective - API's size(), thread_rank(), and sync(). -*/ - -#include "test_common.h" -#include -#include -#include - -using namespace cooperative_groups; - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -/* Parallel reduce kernel. - * - * Step complexity: O(log n) - * Work complexity: O(n) - * - * Note: This kernel works only with power of 2 input arrays. - */ -__device__ int reduction_kernel(thread_group g, int* x, int val) { - int lane = g.thread_rank(); - int sz = g.size(); - - for (int i = g.size() / 2; i > 0; i /= 2) { - // use lds to store the temporary result - x[lane] = val; - // Ensure all the stores are completed. - g.sync(); - - if (lane < i) { - val += x[lane + i]; - } - // It must work on one tiled thread group at a time, - // and it must make sure all memory operations are - // completed before moving to the next stride. - // sync() here just does that. - g.sync(); - } - - // Choose the 0'th indexed thread that holds the reduction value to return - if (g.thread_rank() == 0) { - return val; - } - // Rest of the threads return no useful values - else { - return -1; - } -} - -template -__global__ void kernel_cg_group_partition_static(int* result, bool isGlobalMem, int* globalMem) { - thread_block threadBlockCGTy = this_thread_block(); - int threadBlockGroupSize = threadBlockCGTy.size(); - - int* workspace = NULL; - - if (isGlobalMem) { - workspace = globalMem; - } else { - // Declare a shared memory - extern __shared__ int sharedMem[]; - workspace = sharedMem; - } - - int input, outputSum, expectedOutput; - - // we pass its own thread rank as inputs - input = threadBlockCGTy.thread_rank(); - - expectedOutput = (threadBlockGroupSize - 1) * threadBlockGroupSize / 2; - - outputSum = reduction_kernel(threadBlockCGTy, workspace, input); - - // Choose a leader thread to print the results - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Sum of all ranks 0..%d in threadBlockCooperativeGroup is %d (expected %d)\n\n", - (int)threadBlockCGTy.size() - 1, outputSum, expectedOutput); - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - thread_block_tile tiledPartition = tiled_partition(threadBlockCGTy); - - // This offset allows each group to have its own unique area in the workspace array - int workspaceOffset = threadBlockCGTy.thread_rank() - tiledPartition.thread_rank(); - - outputSum = reduction_kernel(tiledPartition, workspace + workspaceOffset, input); - - if (tiledPartition.thread_rank() == 0) { - printf( - " Sum of all ranks 0..%d in this tiledPartition group is %d. Corresponding parent thread " - "rank: %d\n", - tiledPartition.size() - 1, outputSum, input); - result[input / (tileSz)] = outputSum; - } - return; -} - - -__global__ void kernel_cg_group_partition_dynamic(unsigned int tileSz, int* result, - bool isGlobalMem, int* globalMem) { - thread_block threadBlockCGTy = this_thread_block(); - int threadBlockGroupSize = threadBlockCGTy.size(); - - int* workspace = NULL; - - if (isGlobalMem) { - workspace = globalMem; - } else { - // Declare a shared memory - extern __shared__ int sharedMem[]; - workspace = sharedMem; - } - - int input, outputSum, expectedOutput; - - // input to reduction, for each thread, is its' rank in the group - input = threadBlockCGTy.thread_rank(); - - expectedOutput = (threadBlockGroupSize - 1) * threadBlockGroupSize / 2; - - outputSum = reduction_kernel(threadBlockCGTy, workspace, input); - - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Sum of all ranks 0..%d in threadBlockCooperativeGroup is %d\n\n", - (int)threadBlockCGTy.size() - 1, outputSum); - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - thread_group tiledPartition = tiled_partition(threadBlockCGTy, tileSz); - - // This offset allows each group to have its own unique area in the workspace array - int workspaceOffset = threadBlockCGTy.thread_rank() - tiledPartition.thread_rank(); - - outputSum = reduction_kernel(tiledPartition, workspace + workspaceOffset, input); - - if (tiledPartition.thread_rank() == 0) { - printf( - " Sum of all ranks 0..%d in this tiledPartition group is %d. Corresponding parent thread " - "rank: %d\n", - tiledPartition.size() - 1, outputSum, input); - - result[input / (tileSz)] = outputSum; - } - return; -} - -// Search if the sum exists in the expected results array -void verifyResults(int* hPtr, int* dPtr, int size) { - int i = 0, j = 0; - for (i = 0; i < size; i++) { - for (j = 0; j < size; j++) { - if (hPtr[i] == dPtr[j]) { - break; - } - } - if (j == size) { - failed(" Result verification failed!"); - } - } -} - - -template static void test_group_partition(bool useGlobalMem) { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = 64; - - int numTiles = (blockSize * threadsPerBlock) / tileSz; - - // Build an array of expected reduction sum output on the host - // based on the sum of their respective thread ranks for verification. - // eg: parent group has 64threads. - // child thread ranks: 0-15, 16-31, 32-47, 48-63 - // expected sum: 120, 376, 632, 888 - int* expectedSum = new int[numTiles]; - int temp = 0, sum = 0; - - for (int i = 1; i <= numTiles; i++) { - sum = temp; - temp = (((tileSz * i) - 1) * (tileSz * i)) / 2; - expectedSum[i-1] = temp - sum; - } - - int* dResult = NULL; - hipMalloc((void**)&dResult, numTiles * sizeof(int)); - - int* globalMem = NULL; - if (useGlobalMem) { - hipMalloc((void**)&globalMem, threadsPerBlock * sizeof(int)); - } - - int* hResult = NULL; - hipHostMalloc(&hResult, numTiles * sizeof(int), hipHostMallocDefault); - memset(hResult, 0, numTiles * sizeof(int)); - - if (useGlobalMem) { - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition_static, blockSize, threadsPerBlock, 0, 0, - dResult, useGlobalMem, globalMem); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } else { - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition_static, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dResult, useGlobalMem, globalMem); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } - - hipMemcpy(hResult, dResult, numTiles * sizeof(int), hipMemcpyDeviceToHost); - - verifyResults(expectedSum, hResult, numTiles); - - // Free all allocated memory on host and device - hipFree(dResult); - hipFree(hResult); - if (useGlobalMem) { - hipFree(globalMem); - } - delete[] expectedSum; - - printf("\n...PASSED.\n\n"); -} - -static void test_group_partition(unsigned int tileSz, bool useGlobalMem) { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = 64; - - int numTiles = (blockSize * threadsPerBlock) / tileSz; - // Build an array of expected reduction sum output on the host - // based on the sum of their respective thread ranks to use for verification - int* expectedSum = new int[numTiles]; - int temp = 0, sum = 0; - for (int i = 1; i <= numTiles; i++) { - sum = temp; - temp = (((tileSz * i) - 1) * (tileSz * i)) / 2; - expectedSum[i-1] = temp - sum; - } - - int* dResult = NULL; - hipMalloc(&dResult, sizeof(int) * numTiles); - - int* globalMem = NULL; - if (useGlobalMem) { - hipMalloc((void**)&globalMem, threadsPerBlock * sizeof(int)); - } - - int* hResult = NULL; - hipHostMalloc(&hResult, numTiles * sizeof(int), hipHostMallocDefault); - memset(hResult, 0, numTiles * sizeof(int)); - - // Launch Kernel - if (useGlobalMem) { - hipLaunchKernelGGL(kernel_cg_group_partition_dynamic, blockSize, threadsPerBlock, 0, 0, tileSz, - dResult, useGlobalMem, globalMem); - - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } else { - hipLaunchKernelGGL(kernel_cg_group_partition_dynamic, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, tileSz, dResult, useGlobalMem, globalMem); - - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } - - hipMemcpy(hResult, dResult, numTiles * sizeof(int), hipMemcpyDeviceToHost); - - verifyResults(expectedSum, hResult, numTiles); - - // Free all allocated memory on host and device - hipFree(dResult); - hipFree(hResult); - if (useGlobalMem) { - hipFree(globalMem); - } - delete[] expectedSum; - - printf("\n...PASSED.\n\n"); -} - -int main() { - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - } - - bool useGlobalMem = true; - std::cout << "Testing static tiled_partition for different tile sizes" << std::endl; - std::cout << "\nUsing global memory for computation\n"; - /* Test static tile_partition */ - std::cout << "TEST 1:" << '\n' << std::endl; - test_group_partition<2>(useGlobalMem); - std::cout << "TEST 2:" << '\n' << std::endl; - test_group_partition<4>(useGlobalMem); - std::cout << "TEST 3:" << '\n' << std::endl; - test_group_partition<8>(useGlobalMem); - std::cout << "TEST 4:" << '\n' << std::endl; - test_group_partition<16>(useGlobalMem); - std::cout << "TEST 5:" << '\n' << std::endl; - test_group_partition<32>(useGlobalMem); - - useGlobalMem = false; - std::cout << "Testing static tiled_partition for different tile sizes" << std::endl; - std::cout << "\nUsing shared memory for computation\n"; - /* Test static tile_partition */ - std::cout << "TEST 1:" << '\n' << std::endl; - test_group_partition<2>(useGlobalMem); - std::cout << "TEST 2:" << '\n' << std::endl; - test_group_partition<4>(useGlobalMem); - std::cout << "TEST 3:" << '\n' << std::endl; - test_group_partition<8>(useGlobalMem); - std::cout << "TEST 4:" << '\n' << std::endl; - test_group_partition<16>(useGlobalMem); - std::cout << "TEST 5:" << '\n' << std::endl; - test_group_partition<32>(useGlobalMem); - - - std::cout << "Now testing dynamic tiled_partition for different tile sizes" << '\n' << std::endl; - - /* Test dynamic group partition*/ - useGlobalMem = true; - int testNo = 1; - std::vector tileSizes = {2, 4, 8, 16, 32}; - std::cout << "\nUsing global memory for computation\n"; - for (auto i : tileSizes) { - std::cout << "TEST " << testNo << ":" << '\n' << std::endl; - test_group_partition(i, useGlobalMem); - testNo++; - } - - useGlobalMem = false; - testNo = 1; - std::cout << "\nUsing shared memory for computation\n"; - for (auto i : tileSizes) { - std::cout << "TEST " << testNo << ":" << '\n' << std::endl; - test_group_partition(i, useGlobalMem); - testNo++; - } - - passed(); - return 0; -} diff --git a/tests/src/runtimeApi/cooperativeGrps/thread_block_tile_shfl_ops.cpp b/tests/src/runtimeApi/cooperativeGrps/thread_block_tile_shfl_ops.cpp deleted file mode 100644 index db8342deb4..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/thread_block_tile_shfl_ops.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/* This test implements sum reduction kernel, first with each threads own rank - as input and comparing the sum with expected sum output derieved from n(n-1)/2 - formula. - This sample tests functionality of intrinsics provided by thread_block_tile type, - shfl_down and shfl_xor. -*/ - -#include "test_common.h" -#include -#include -#include - -using namespace cooperative_groups; - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -template -__device__ int reduction_kernel_shfl_down(thread_block_tile const& g, volatile int val) { - int sz = g.size(); - - for (int i = sz / 2; i > 0; i >>= 1) { - val += g.shfl_down(val, i); - } - - // Choose the 0'th indexed thread that holds the reduction value to return - if (g.thread_rank() == 0) { - return val; - } - // Rest of the threads return no useful values - else { - return -1; - } -} - -template -__device__ int reduction_kernel_shfl_xor(thread_block_tile const& g, int val) { - int sz = g.size(); - - for (int i = sz / 2; i > 0; i >>= 1) { - val += g.shfl_xor(val, i); - } - - // Choose the 0'th indexed thread that holds the reduction value to return - if (g.thread_rank() == 0) { - return val; - } - // Rest of the threads return no useful values - else { - return -1; - } -} - -template -__global__ void kernel_cg_group_partition_static(int* result, bool runShflDown) { - thread_block threadBlockCGTy = this_thread_block(); - int threadBlockGroupSize = threadBlockCGTy.size(); - int input, outputSum, expectedSum; - - // Choose a leader thread to print the results - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - thread_block_tile tiledPartition = tiled_partition(threadBlockCGTy); - int threadRank = tiledPartition.thread_rank(); - - input = tiledPartition.thread_rank(); - - // (n-1)(n)/2 - expectedSum = ((tileSz - 1) * tileSz / 2); - - if (runShflDown) { - outputSum = reduction_kernel_shfl_down(tiledPartition, input); - - if (tiledPartition.thread_rank() == 0) { - printf( - " Sum of all ranks 0..%d in this tiledPartition group using shfl_down is %d (expected " - "%d)\n", - tiledPartition.size() - 1, outputSum, expectedSum); - result[threadBlockCGTy.thread_rank() / (tileSz)] = outputSum; - } - } else { - outputSum = reduction_kernel_shfl_xor(tiledPartition, input); - - if (tiledPartition.thread_rank() == 0) { - printf( - " Sum of all ranks 0..%d in this tiledPartition group using shfl_xor is %d (expected " - "%d)\n", - tiledPartition.size() - 1, outputSum, expectedSum); - result[threadBlockCGTy.thread_rank() / (tileSz)] = outputSum; - } - } - - return; -} - -void verifyResults(int* ptr, int expectedResult, int numTiles) { - for (int i = 0; i < numTiles; i++) { - if (ptr[i] != expectedResult) { - failed(" Results do not match! "); - } - } -} - -template static void test_group_partition(bool runShflDown) { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = 64; - - int numTiles = (blockSize * threadsPerBlock) / tileSz; - int expectedSum = ((tileSz - 1) * tileSz / 2); - int* expectedResult = new int[numTiles]; - - for (int i = 0; i < numTiles; i++) { - expectedResult[i] = expectedSum; - } - - int* dResult = NULL; - int* hResult = NULL; - - hipHostMalloc(&hResult, numTiles * sizeof(int), hipHostMallocDefault); - memset(hResult, 0, numTiles * sizeof(int)); - - hipMalloc(&dResult, numTiles * sizeof(int)); - - if (runShflDown) { - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition_static, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dResult, runShflDown); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } else { - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition_static, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dResult, runShflDown); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - } - - hipMemcpy(hResult, dResult, sizeof(int) * numTiles, hipMemcpyDeviceToHost); - - verifyResults(hResult, expectedSum, numTiles); - - // Free all allocated memory on host and device - hipFree(dResult); - hipFree(hResult); - delete[] expectedResult; - - printf("\n...PASSED.\n\n"); -} - - -int main() { - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - - bool runShflDown = true; - std::cout << "Testing static tiled_partition for different tile sizes using shfl_down" - << std::endl; - /* Test static tile_partition */ - std::cout << "TEST 1:" << '\n' << std::endl; - test_group_partition<2>(runShflDown); - std::cout << "TEST 2:" << '\n' << std::endl; - test_group_partition<4>(runShflDown); - std::cout << "TEST 3:" << '\n' << std::endl; - test_group_partition<8>(runShflDown); - std::cout << "TEST 4:" << '\n' << std::endl; - test_group_partition<16>(runShflDown); - std::cout << "TEST 5:" << '\n' << std::endl; - test_group_partition<32>(runShflDown); - - runShflDown = false; - std::cout << "Testing static tiled_partition for different tile sizes using shfl_xor" - << std::endl; - /* Test static tile_partition */ - std::cout << "TEST 1:" << '\n' << std::endl; - test_group_partition<2>(runShflDown); - std::cout << "TEST 2:" << '\n' << std::endl; - test_group_partition<4>(runShflDown); - std::cout << "TEST 3:" << '\n' << std::endl; - test_group_partition<8>(runShflDown); - std::cout << "TEST 4:" << '\n' << std::endl; - test_group_partition<16>(runShflDown); - std::cout << "TEST 5:" << '\n' << std::endl; - test_group_partition<32>(runShflDown); - - passed(); -} diff --git a/tests/src/runtimeApi/cooperativeGrps/thread_block_tiled_shfl_up.cpp b/tests/src/runtimeApi/cooperativeGrps/thread_block_tiled_shfl_up.cpp deleted file mode 100644 index 436136aeb5..0000000000 --- a/tests/src/runtimeApi/cooperativeGrps/thread_block_tiled_shfl_up.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/* This test implements prefix sum(scan) kernel, first with each threads own rank - as input and comparing the sum with expected serial summation output on CPU. - - This sample tests functionality of intrinsics provided by thread_block_tile type, - shfl_up. -*/ -#include "test_common.h" -#include -#include -#include - -using namespace cooperative_groups; - -#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs) - -template -__device__ int prefix_sum_kernel(thread_block_tile const& g, volatile int val) { - int sz = g.size(); -#pragma unroll - for (int i = 1; i < sz; i <<= 1) { - int temp = g.shfl_up(val, i); - - if (g.thread_rank() >= i) { - val += temp; - } - } - return val; -} - -template __global__ void kernel_cg_group_partition_static(int* dPtr) { - thread_block threadBlockCGTy = this_thread_block(); - int threadBlockGroupSize = threadBlockCGTy.size(); - - int input, outputSum; - - // we pass its own thread rank as inputs - input = threadBlockCGTy.thread_rank(); - - // Choose a leader thread to print the results - if (threadBlockCGTy.thread_rank() == 0) { - printf(" Creating %d groups, of tile size %d threads:\n\n", - (int)threadBlockCGTy.size() / tileSz, tileSz); - } - - threadBlockCGTy.sync(); - - thread_block_tile tiledPartition = tiled_partition(threadBlockCGTy); - - input = tiledPartition.thread_rank(); - - outputSum = prefix_sum_kernel(tiledPartition, input); - - // Update the result array with the corresponsing prefix sum - dPtr[threadBlockCGTy.thread_rank()] = outputSum; - return; -} - -void serialScan(int* ptr, int size) { - // Fill up the array - for (int i = 0; i < size; i++) { - ptr[i] = i; - } - - int acc = 0; - for (int i = 0; i < size; i++) { - acc = acc + ptr[i]; - ptr[i] = acc; - } -} - -void printResults(int* ptr, int size) { - for (int i = 0; i < size; i++) { - std::cout << ptr[i] << " "; - } - std::cout << '\n'; -} - -void verifyResults(int* cpu, int* gpu, int size) { - for (unsigned int i = 0; i < size / sizeof(int); i++) { - if (cpu[i] != gpu[i]) { - failed(" Prefix sum results do not match."); - } - } -} - -template static void test_group_partition() { - hipError_t err; - int blockSize = 1; - int threadsPerBlock = 64; - - int* hPtr = NULL; - int* dPtr = NULL; - int* cpuPrefixSum = NULL; - - int arrSize = blockSize * threadsPerBlock * sizeof(int); - - hipHostMalloc(&hPtr, arrSize); - hipMalloc(&dPtr, arrSize); - - // Launch Kernel - hipLaunchKernelGGL(kernel_cg_group_partition_static, blockSize, threadsPerBlock, - threadsPerBlock * sizeof(int), 0, dPtr); - hipMemcpy(hPtr, dPtr, arrSize, hipMemcpyDeviceToHost); - err = hipDeviceSynchronize(); - if (err != hipSuccess) { - fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); - } - - cpuPrefixSum = new int[tileSz]; - serialScan(cpuPrefixSum, tileSz); - std::cout << "\nPrefix sum results on CPU\n"; - printResults(cpuPrefixSum, tileSz); - - std::cout << "\nPrefix sum results on GPU\n"; - printResults(hPtr, tileSz); - std::cout << "\n"; - verifyResults(hPtr, cpuPrefixSum, tileSz); - std::cout << "Results verified!\n"; - - delete[] cpuPrefixSum; - hipFree(hPtr); - hipFree(dPtr); -} - -int main() { - // Use default device for validating the test - int deviceId; - ASSERT_EQUAL(hipGetDevice(&deviceId), hipSuccess); - hipDeviceProp_t deviceProperties; - ASSERT_EQUAL(hipGetDeviceProperties(&deviceProperties, deviceId), hipSuccess); - int maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; - - if (!deviceProperties.cooperativeLaunch) { - std::cout << "info: Device doesn't support cooperative launch! skipping the test!\n"; - if (hip_skip_tests_enabled()) { - return hip_skip_retcode(); - } else { - passed(); - } - return 0; - } - std::cout << "Testing static tiled_partition for different tile sizes" << std::endl; - /* Test static tile_partition */ - std::cout << "TEST 1:" << '\n' << std::endl; - test_group_partition<2>(); - std::cout << "TEST 2:" << '\n' << std::endl; - test_group_partition<4>(); - std::cout << "TEST 3:" << '\n' << std::endl; - test_group_partition<8>(); - std::cout << "TEST 4:" << '\n' << std::endl; - test_group_partition<16>(); - std::cout << "TEST 5:" << '\n' << std::endl; - test_group_partition<32>(); - passed(); -} - -/* Kogge-Stone algorithm */ \ No newline at end of file diff --git a/tests/src/runtimeApi/device/hipChooseDevice.cpp b/tests/src/runtimeApi/device/hipChooseDevice.cpp deleted file mode 100644 index a49af74e4b..0000000000 --- a/tests/src/runtimeApi/device/hipChooseDevice.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -int main(void) { - hipDeviceProp_t prop; - int dev; - - hipGetDevice(&dev); - printf("ID of current HIP device: %d\n", dev); - - memset(&prop, 0, sizeof(hipDeviceProp_t)); - prop.major = 1; - prop.minor = 3; - hipChooseDevice(&dev, &prop); - printf("ID of hip device closest to revision 1.3: %d\n", dev); - - hipSetDevice(dev); - - passed(); -} diff --git a/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp b/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp deleted file mode 100644 index ddb60ea059..0000000000 --- a/tests/src/runtimeApi/device/hipDeviceComputeCapability.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - int numDevices = 0; - int major, minor; - hipDevice_t device; - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipDeviceGet(&device, i)); - HIPCHECK(hipDeviceComputeCapability(&major, &minor, device)); - HIPASSERT(major >= 0); - HIPASSERT(minor >= 0); - } - passed(); -} diff --git a/tests/src/runtimeApi/device/hipDeviceGetByPCIBusId.cpp b/tests/src/runtimeApi/device/hipDeviceGetByPCIBusId.cpp deleted file mode 100644 index 195a0511b7..0000000000 --- a/tests/src/runtimeApi/device/hipDeviceGetByPCIBusId.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * TEST: %t --tests 0x3 - * TEST: %t --tests 0x4 - * HIT_END - */ - -#include -#include "test_common.h" -/** - * Validates negative scenarios for hipDeviceGetByPCIBusId - * scenario: Validates device number from pciBusIdstr string - */ -bool testPciBusId(void) { - bool testResult = true; - char pciBusId[13]; - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - for (int i = 0; i < deviceCount; i++) { - int pciBusID = -1; - int pciDeviceID = -1; - int pciDomainID = -1; - int tempPciBusId = -1; - int tempDeviceId = -1; - HIPCHECK(hipDeviceGetPCIBusId(&pciBusId[0], 13, i)); - sscanf(pciBusId, "%04x:%02x:%02x", &pciDomainID, - &pciBusID, &pciDeviceID); - HIPCHECK(hipDeviceGetAttribute(&tempPciBusId, - hipDeviceAttributePciBusId, i)); - if (pciBusID != tempPciBusId) { - testResult = false; - break; - } - HIPCHECK(hipDeviceGetByPCIBusId(&tempDeviceId, pciBusId)); - if (tempDeviceId != i) { - testResult = false; - break; - } - } - return testResult; -} - -/** - * Validates negative scenarios for hipDeviceGetByPCIBusId - * scenario: device = nullptr and pciBusIdstr = nullptr - */ -bool testNullPtr() { - bool TestPassed = true; - int device = -1; - hipError_t ret; - char pciBusIdstr[13]; - ret = hipDeviceGetByPCIBusId(nullptr, pciBusIdstr); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = nullptr} Failed \n"); - } - ret = hipDeviceGetByPCIBusId(&device, nullptr); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {pciBusIdstr = nullptr} Failed \n"); - } - return TestPassed; -} - -/** - * Validates negative scenarios for hipDeviceGetByPCIBusId - * scenario1: Pass an empty like "" - * scenario1: Pass an shorter string "0000:" - */ -bool testInputString() { - bool TestPassed = true; - int device = -1; - hipError_t ret; - ret = hipDeviceGetByPCIBusId(&device, ""); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {empty input string:\"\"} Failed \n"); - } - ret = hipDeviceGetByPCIBusId(&device, "0000:"); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {shorter input string: \"0000:\"} Failed \n"); - } - return TestPassed; -} - -/** - * Validates negative scenarios for hipDeviceGetByPCIBusId - * scenario: Pass wrong bus id in pciBusIdstr - */ -bool testWrongBusID() { - bool TestPassed = true; - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - int pciBusId[deviceCount], pciDeviceID[deviceCount], - pciDomainID[deviceCount]; - // get bus id of all the devices - for (int i = 0; i < deviceCount; i++) { - hipDeviceProp_t prop; - HIPCHECK(hipGetDeviceProperties(&prop, i)); - pciBusId[i] = prop.pciBusID; - pciDeviceID[i] = prop.pciDeviceID; - pciDomainID[i] = prop.pciDomainID; - printf("device %d: pciDomainID=%x, pciBusID=%x, pciDeviceID=%x \n", - i, prop.pciDomainID, prop.pciBusID, prop.pciDomainID); - } - // get a non existing bus id - int id = 0; - for (; id < 256; id++) { - bool bFound = false; - // check if id is the pci busid of any existing device - for (int j = 0; j < deviceCount; j++) { - if (id == pciBusId[j]) { - bFound = true; - break; - } - } - if (!bFound) - break; - } - // now pass the non existing bus id as string - char pciBusIdstr[12]; - int device = -1; - hipError_t ret; - snprintf(pciBusIdstr, sizeof(pciBusIdstr), "%04x:%02x:%02x", pciDomainID[0], - id, pciDeviceID[0]); - ret = hipDeviceGetByPCIBusId(&device, pciBusIdstr); - if (ret == hipSuccess) { - TestPassed = false; - printf("Test: hipDeviceGetByPCIBusId(&device,%s) Failed \n", pciBusIdstr); - } - return TestPassed; -} - -int main(int argc, char* argv[]) { - bool TestPassed = true; - HipTest::parseStandardArguments(argc, argv, true); - - if (p_tests == 0x1) { - TestPassed = testPciBusId(); - } else if (p_tests == 0x2) { - TestPassed = testNullPtr(); - } else if (p_tests == 0x3) { - TestPassed = testInputString(); - } else if (p_tests == 0x4) { - TestPassed = testWrongBusID(); - } else { - printf("Invalid Test Case \n"); - exit(1); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/device/hipDeviceGetName.cpp b/tests/src/runtimeApi/device/hipDeviceGetName.cpp deleted file mode 100644 index b08d093d91..0000000000 --- a/tests/src/runtimeApi/device/hipDeviceGetName.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -#define len 256 - -int main() { - int numDevices = 0; - char name[len]; - hipDevice_t device; - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipDeviceGet(&device, i)); - HIPCHECK(hipDeviceGetName(name, len, device)); - HIPASSERT(name != ""); - } - passed(); -} diff --git a/tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp b/tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp deleted file mode 100644 index 8349375647..0000000000 --- a/tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* - * Test to compare - * 1.pciBusID from hipDeviceGetPCIBusId and hipDeviceGetAttribute ** - * 2.{pciDomainID, pciBusID, pciDeviceID} values hipDeviceGetPCIBusId vs lspci ** - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipDeviceGetPCIBusId-vs-hipDeviceGetAttribute --tests 0x1 - * TEST_NAMED: %t hipDeviceGetPCIBusId-vs-lspci --tests 0x2 - * TEST_NAMED: %t hipDeviceGetPCIBusId-negative --tests 0x3 - * HIT_END - */ - -#include "test_common.h" -#define MAX_DEVICE_LENGTH 20 - -static bool getPciBusId(int deviceCount, - char hipDeviceList[][MAX_DEVICE_LENGTH]) { - for (int i = 0; i < deviceCount; i++) { - HIPCHECK(hipDeviceGetPCIBusId(hipDeviceList[i], MAX_DEVICE_LENGTH, i)); - } - return true; -} - -bool comparePciBusIDWithHipDeviceGetAttribute() { - bool testResult = true; - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - printf("No.of gpus in the system: %d\n", deviceCount); - char hipDeviceList[deviceCount][MAX_DEVICE_LENGTH]; - - getPciBusId(deviceCount, hipDeviceList); - - for (int i = 0; i < deviceCount; i++) { - int pciBusID = -1; - int pciDeviceID = -1; - int pciDomainID = -1; - int tempPciBusId = -1; - sscanf(hipDeviceList[i], "%04x:%02x:%02x", &pciDomainID, &pciBusID, - &pciDeviceID); - HIPCHECK(hipDeviceGetAttribute(&tempPciBusId, - hipDeviceAttributePciBusId, i)); - if (pciBusID != tempPciBusId) { - testResult = false; - printf("pciBusID from hipDeviceGetPCIBusId mismatched to that from " - "hipDeviceGetAttribute for gpu %d\n", i); - } - } - - printf("pciBusID output of both hipDeviceGetPCIBusId and" - " hipDeviceGetAttribute matched for all gpus\n"); - return testResult; -} - -#ifdef __linux__ -bool compareHipDeviceGetPCIBusIdWithLspci() { - FILE *fpipe; - bool testResult = false; - - { - // Check if lspci is installed, if not, don't proceed - char const *cmd = "lspci --version"; - char *lspciCheck; - char temp[20]; - fpipe = popen(cmd, "r"); - - if (fpipe == nullptr) { - printf("Unable to create command file\n"); - return testResult; - } - - lspciCheck = fgets(temp, 20, fpipe); - pclose(fpipe); - - if (!lspciCheck) { - printf("lspci not found. Skipping the test\n"); - return true; - } - } - - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - printf("No.of gpus in the system: %d\n", deviceCount); - char hipDeviceList[deviceCount][MAX_DEVICE_LENGTH]; - char pciDeviceList[deviceCount][MAX_DEVICE_LENGTH]; - - getPciBusId(deviceCount, hipDeviceList); - - // Get lspci device list and compare with hip device list -#ifdef __HIP_PLATFORM_NVCC__ - char const *command = "lspci -D | grep controller | grep NVIDIA | " - "cut -d ' ' -f 1"; -#else - char const *command = "lspci -D | grep -e controller -e accelerator | grep AMD/ATI | " - "cut -d ' ' -f 1"; -#endif - fpipe = popen(command, "r"); - - if (fpipe == nullptr) { - printf("Unable to create command file\n"); - return testResult; - } - - int index = 0; - int deviceMatchCount = 0; - - while (fgets(pciDeviceList[index], sizeof(pciDeviceList[index]), fpipe)) { - bool bMatchFound = false; - for (int deviceNo = 0; deviceNo < deviceCount; deviceNo++) { - if (!strncasecmp(pciDeviceList[index], hipDeviceList[deviceNo], 10)) { - deviceMatchCount++; - bMatchFound = true; - } - } - if (bMatchFound == false) { - printf("PCI device: %s is not reported by HIP\n", - pciDeviceList[index]); - } - index++; - } - - pclose(fpipe); - - if (deviceMatchCount == deviceCount) { - printf("hip and lspci output for {pciDomainID, pciBusID, pciDeviceID} " - "matched for all gpus\n"); - testResult = true; - } else { - printf("Mismatch in number GPUs reported by HIP with lscpi\n"); - } - return testResult; -} -#endif - -/** - * Validates negative scenarios for hipDeviceGetPCIBusId - * scenario1: pciBusId = nullptr - * scenario2: device = -1 (Invalid Device) - * scenario3: device = Non Existing Device - * scenario4: len = 0 - * scenario5: len < 0 - */ -bool testInvalidParameters() { - bool TestPassed = true; - hipError_t ret; - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - printf("No.of gpus in the system: %d\n", deviceCount); - char pciBusId[MAX_DEVICE_LENGTH]; - // pciBusId = nullptr - int device; - HIPCHECK(hipGetDevice(&device)); - ret = hipDeviceGetPCIBusId(nullptr, MAX_DEVICE_LENGTH, device); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {pciBusId = nullptr} Failed \n"); - } - // len = 0 - ret = hipDeviceGetPCIBusId(pciBusId, 0, device); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {len = 0} Failed \n"); - } - // len < 0 - ret = hipDeviceGetPCIBusId(pciBusId, -1, device); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {len < 0} Failed \n"); - } - // device = -1 - ret = hipDeviceGetPCIBusId(pciBusId, MAX_DEVICE_LENGTH, -1); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = -1} Failed \n"); - } - // device = Non Existing Device - ret = hipDeviceGetPCIBusId(pciBusId, MAX_DEVICE_LENGTH, deviceCount); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = Non Existing Device} Failed \n"); - } - return TestPassed; -} - -int main(int argc, char* argv[]) { - bool testResult = true; - HipTest::parseStandardArguments(argc, argv, true); - - if (p_tests == 0x1) { - testResult &= comparePciBusIDWithHipDeviceGetAttribute(); - } - - if (p_tests == 0x2) { -#ifdef __linux__ - testResult &= compareHipDeviceGetPCIBusIdWithLspci(); -#else - printf("Detected non-linux OS. Skipping the test\n"); -#endif - } - - if (p_tests == 0x3) { - testResult &= testInvalidParameters(); - } - - if (testResult) { - passed(); - } else { - failed("one or more tests failed\n"); - } -} diff --git a/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp b/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp deleted file mode 100644 index f6eb78d3bc..0000000000 --- a/tests/src/runtimeApi/device/hipDeviceSynchronize.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Test for checking the functionality of - * hipError_t hipDeviceSynchronize(); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -#define _SIZE sizeof(int) * 1024 * 1024 -#define NUM_STREAMS 2 - -__global__ void Iter(int* Ad, int num) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - // Kernel loop designed to execute very slowly... ... ... so we can test timing-related - // behavior below - if (tx == 0) { - for (int i = 0; i < num; i++) { - Ad[tx] += 1; - } - } -} - -int main() { - int* A[NUM_STREAMS]; - int* Ad[NUM_STREAMS]; - hipStream_t stream[NUM_STREAMS]; - for (int i = 0; i < NUM_STREAMS; i++) { - HIPCHECK(hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault)); - A[i][0] = 1; - HIPCHECK(hipMalloc((void**)&Ad[i], _SIZE)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - for (int i = 0; i < NUM_STREAMS; i++) { - HIPCHECK(hipMemcpyAsync(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice, stream[i])); - } - for (int i = 0; i < NUM_STREAMS; i++) { - hipLaunchKernelGGL(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, stream[i], Ad[i], 1 << 30); - } - for (int i = 0; i < NUM_STREAMS; i++) { - HIPCHECK(hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, stream[i])); - } - - - // This first check but relies on the kernel running for so long that the D2H async memcopy has - // not started yet. This will be true in an optimal asynchronous implementation. Conservative - // implementations which synchronize the hipMemcpyAsync will fail, ie if - // HIP_LAUNCH_BLOCKING=true - HIPASSERT(1 << 30 != A[NUM_STREAMS - 1][0] - 1); - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(1 << 30 == A[NUM_STREAMS - 1][0] - 1); - passed(); -} diff --git a/tests/src/runtimeApi/device/hipDeviceTotalMem.cpp b/tests/src/runtimeApi/device/hipDeviceTotalMem.cpp deleted file mode 100644 index bc41194753..0000000000 --- a/tests/src/runtimeApi/device/hipDeviceTotalMem.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - int numDevices = 0; - size_t totMem; - hipDevice_t device; - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipDeviceGet(&device, i)); - HIPCHECK(hipDeviceTotalMem(&totMem, device)); - HIPASSERT(totMem != 0); - } - passed(); -} diff --git a/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp b/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp deleted file mode 100644 index 29fb89ee1e..0000000000 --- a/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp +++ /dev/null @@ -1,303 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test the device info API extensions for HIP: - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * HIT_END - */ - -#include -#include -#include "test_common.h" - -hipError_t test_hipDeviceGetAttribute(int deviceId, - hipDeviceAttribute_t attr, - int expectedValue = -1) { - int value = 0; - std::cout << "Test hipDeviceGetAttribute attribute " << attr; - if (expectedValue != -1) { - std::cout << " expected value " << expectedValue; - } - hipError_t e = hipDeviceGetAttribute(&value, attr, deviceId); - std::cout << " actual value " << value << std::endl; - if ((expectedValue != -1) && value != expectedValue) { - std::cout << "fail" << std::endl; - return hipErrorInvalidValue; - } - return hipSuccess; -} - -hipError_t test_hipDeviceGetHdpAddress(int deviceId, - hipDeviceAttribute_t attr, - uint32_t* expectedValue) { - uint32_t* value = 0; - std::cout << "Test hipDeviceGetHdpAddress attribute " << attr; - if (expectedValue != reinterpret_cast(0xdeadbeef)) { - std::cout << " expected value " << expectedValue; - } - hipError_t e = hipDeviceGetAttribute(reinterpret_cast(&value), - attr, deviceId); - std::cout << " actual value " << value << std::endl; - if ((expectedValue != reinterpret_cast(0xdeadbeef)) && - value != expectedValue) { - std::cout << "fail" << std::endl; - return hipErrorInvalidValue; - } - return hipSuccess; -} - -bool testAttributeValues() { - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); - printf("info: running on device #%d %s\n", deviceId, props.name); - - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxThreadsPerBlock, - props.maxThreadsPerBlock)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxBlockDimX, - props.maxThreadsDim[0])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxBlockDimY, - props.maxThreadsDim[1])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxBlockDimZ, - props.maxThreadsDim[2])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxGridDimX, - props.maxGridSize[0])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxGridDimY, - props.maxGridSize[1])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxGridDimZ, - props.maxGridSize[2])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxSharedMemoryPerBlock, - props.sharedMemPerBlock)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeTotalConstantMemory, - props.totalConstMem)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeWarpSize, - props.warpSize)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxRegistersPerBlock, - props.regsPerBlock)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeClockRate, - props.clockRate)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMemoryClockRate, - props.memoryClockRate)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMemoryBusWidth, - props.memoryBusWidth)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMultiprocessorCount, - props.multiProcessorCount)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeIsMultiGpuBoard, - props.isMultiGpuBoard)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeComputeMode, - props.computeMode)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeL2CacheSize, - props.l2CacheSize)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxThreadsPerMultiProcessor, - props.maxThreadsPerMultiProcessor)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeComputeCapabilityMajor, - props.major)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeComputeCapabilityMinor, - props.minor)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeConcurrentKernels, - props.concurrentKernels)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributePciBusId, - props.pciBusID)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributePciDeviceId, - props.pciDeviceID)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, - props.maxSharedMemoryPerMultiProcessor)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeIntegrated, - props.integrated)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxTexture1DWidth, - props.maxTexture1D)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxTexture2DWidth, - props.maxTexture2D[0])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxTexture2DHeight, - props.maxTexture2D[1])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxTexture3DWidth, - props.maxTexture3D[0])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxTexture3DHeight, - props.maxTexture3D[1])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxTexture3DDepth, - props.maxTexture3D[2])); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCooperativeLaunch, - props.cooperativeLaunch)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCooperativeMultiDeviceLaunch, - props.cooperativeMultiDeviceLaunch)); - -#ifndef __HIP_PLATFORM_NVCC__ - HIPCHECK(test_hipDeviceGetHdpAddress(deviceId, - hipDeviceAttributeHdpMemFlushCntl, - props.hdpMemFlushCntl)); - HIPCHECK(test_hipDeviceGetHdpAddress(deviceId, - hipDeviceAttributeHdpRegFlushCntl, - props.hdpRegFlushCntl)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeDirectManagedMemAccessFromHost, - props.directManagedMemAccessFromHost)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeConcurrentManagedAccess, - props.concurrentManagedAccess)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributePageableMemoryAccess, - props.pageableMemoryAccess)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributePageableMemoryAccessUsesHostPageTables, - props.pageableMemoryAccessUsesHostPageTables)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, - props.cooperativeMultiDeviceUnmatchedFunc)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, - props.cooperativeMultiDeviceUnmatchedGridDim)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, - props.cooperativeMultiDeviceUnmatchedBlockDim)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, - props.cooperativeMultiDeviceUnmatchedSharedMem)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeAsicRevision, - props.asicRevision)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeManagedMemory, - props.managedMemory)); -#endif - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeMaxPitch, - props.memPitch)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeTextureAlignment, - props.textureAlignment)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeKernelExecTimeout, - props.kernelExecTimeoutEnabled)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeCanMapHostMemory, - props.canMapHostMemory)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeEccEnabled, - props.ECCEnabled)); - HIPCHECK(test_hipDeviceGetAttribute(deviceId, - hipDeviceAttributeTexturePitchAlignment, - props.texturePitchAlignment)); - return true; -} -/** - * Validates negative scenarios for hipDeviceGetAttribute - * scenario1: pi = nullptr - * scenario2: device = -1 (Invalid Device) - * scenario3: device = Non Existing Device - * scenario4: attr = Invalid Attribute - */ -bool testInvalidParameters() { - bool TestPassed = true; - hipError_t ret; - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - printf("No.of gpus in the system: %d\n", deviceCount); - // pi = nullptr - int device; - HIPCHECK(hipGetDevice(&device)); - ret = hipDeviceGetAttribute(nullptr, hipDeviceAttributePciBusId, device); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {pi = nullptr} Failed \n"); - } - // device = -1 - int pi = -1; - ret = hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, -1); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = -1} Failed \n"); - } - // device = Non Existing Device - pi = -1; - ret = hipDeviceGetAttribute(&pi, hipDeviceAttributePciBusId, deviceCount); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = Non Existing Device} Failed \n"); - } - // attr = Invalid Attribute - pi = -1; - ret = hipDeviceGetAttribute(&pi, static_cast(-1), - device); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {attr = Invalid Attribute} Failed \n"); - } - return TestPassed; -} - -int main(int argc, char* argv[]) { - bool TestPassed = true; - HipTest::parseStandardArguments(argc, argv, true); - - if (p_tests == 0x1) { - TestPassed = testAttributeValues(); - } else if (p_tests == 0x2) { - TestPassed = testInvalidParameters(); - } else { - printf("Invalid Test Case \n"); - exit(1); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/device/hipGetDeviceProperties.cpp b/tests/src/runtimeApi/device/hipGetDeviceProperties.cpp deleted file mode 100644 index 980f15517b..0000000000 --- a/tests/src/runtimeApi/device/hipGetDeviceProperties.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x01 - * TEST: %t --tests 0x02 - * HIT_END - */ -#include -#include -#include -#include "test_common.h" - -#define NUM_OF_ARCHPROP 17 -#define HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS_IDX 0 -#define HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH_IDX 1 -#define HIP_ARCH_HAS_SHARED_INT32_ATOMICS_IDX 2 -#define HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH_IDX 3 -#define HIP_ARCH_HAS_FLOAT_ATOMIC_ADD_IDX 4 -#define HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS_IDX 5 -#define HIP_ARCH_HAS_SHARED_INT64_ATOMICS_IDX 6 -#define HIP_ARCH_HAS_DOUBLES_IDX 7 -#define HIP_ARCH_HAS_WARP_VOTE_IDX 8 -#define HIP_ARCH_HAS_WARP_BALLOT_IDX 9 -#define HIP_ARCH_HAS_WARP_SHUFFLE_IDX 10 -#define HIP_ARCH_HAS_WARP_FUNNEL_SHIFT_IDX 11 -#define HIP_ARCH_HAS_THREAD_FENCE_SYSTEM_IDX 12 -#define HIP_ARCH_HAS_SYNC_THREAD_EXT_IDX 13 -#define HIP_ARCH_HAS_SURFACE_FUNCS_IDX 14 -#define HIP_ARCH_HAS_3DGRID_IDX 15 -#define HIP_ARCH_HAS_DYNAMIC_PARALLEL_IDX 16 - -__device__ void getArchValuesFromDevice(int *archProp_d) { - archProp_d[0] = __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__; - archProp_d[1] = __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__; - archProp_d[2] = __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__; - archProp_d[3] = __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__; - archProp_d[4] = __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__; - archProp_d[5] = __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__; - archProp_d[6] = __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__; - archProp_d[7] = __HIP_ARCH_HAS_DOUBLES__; - archProp_d[8] = __HIP_ARCH_HAS_WARP_VOTE__; - archProp_d[9] = __HIP_ARCH_HAS_WARP_BALLOT__; - archProp_d[10] = __HIP_ARCH_HAS_WARP_SHUFFLE__; - archProp_d[11] = __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__; - archProp_d[12] = __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__; - archProp_d[13] = __HIP_ARCH_HAS_SYNC_THREAD_EXT__; - archProp_d[14] = __HIP_ARCH_HAS_SURFACE_FUNCS__; - archProp_d[15] = __HIP_ARCH_HAS_3DGRID__; - archProp_d[16] = __HIP_ARCH_HAS_DYNAMIC_PARALLEL__; -} - -__global__ void mykernel(int *archProp_d) { - getArchValuesFromDevice(archProp_d); -} - -/** - * Internal Functions - */ -bool validateDeviceMacro(int *archProp_h, hipDeviceProp_t *prop) { - bool TestPassed = true; - if (prop->arch.hasGlobalInt32Atomics != - archProp_h[HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS_IDX]) { - printf("mismatch: hasGlobalInt32Atomics \n"); - TestPassed &= false; - } - if (prop->arch.hasGlobalFloatAtomicExch != - archProp_h[HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH_IDX]) { - printf("mismatch: hasGlobalFloatAtomicExch \n"); - TestPassed &= false; - } - if (prop->arch.hasSharedInt32Atomics != - archProp_h[HIP_ARCH_HAS_SHARED_INT32_ATOMICS_IDX]) { - TestPassed &= false; - } - if (prop->arch.hasSharedFloatAtomicExch != - archProp_h[HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH_IDX]) { - printf("mismatch: hasSharedFloatAtomicExch \n"); - TestPassed &= false; - } - if (prop->arch.hasFloatAtomicAdd != - archProp_h[HIP_ARCH_HAS_FLOAT_ATOMIC_ADD_IDX]) { - printf("mismatch: hasFloatAtomicAdd \n"); - TestPassed &= false; - } - if (prop->arch.hasGlobalInt64Atomics != - archProp_h[HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS_IDX]) { - printf("mismatch: hasGlobalInt64Atomics \n"); - TestPassed &= false; - } - /* TODO: Uncomment this code once the mismatch issue is resolved - if (prop->arch.hasSharedInt64Atomics != - archProp_h[HIP_ARCH_HAS_SHARED_INT64_ATOMICS_IDX]) { - TestPassed &= false; - }*/ - if (prop->arch.hasDoubles != - archProp_h[HIP_ARCH_HAS_DOUBLES_IDX]) { - printf("mismatch: hasDoubles \n"); - TestPassed &= false; - } - if (prop->arch.hasWarpVote != - archProp_h[HIP_ARCH_HAS_WARP_VOTE_IDX]) { - printf("mismatch: hasWarpVote \n"); - TestPassed &= false; - } - if (prop->arch.hasWarpBallot != - archProp_h[HIP_ARCH_HAS_WARP_BALLOT_IDX]) { - printf("mismatch: hasWarpBallot \n"); - TestPassed &= false; - } - if (prop->arch.hasWarpShuffle != - archProp_h[HIP_ARCH_HAS_WARP_SHUFFLE_IDX]) { - printf("mismatch: hasWarpShuffle \n"); - TestPassed &= false; - } - if (prop->arch.hasFunnelShift != - archProp_h[HIP_ARCH_HAS_WARP_FUNNEL_SHIFT_IDX]) { - printf("mismatch: hasFunnelShift \n"); - TestPassed &= false; - } - if (prop->arch.hasThreadFenceSystem != - archProp_h[HIP_ARCH_HAS_THREAD_FENCE_SYSTEM_IDX]) { - printf("mismatch: hasThreadFenceSystem \n"); - TestPassed &= false; - } - if (prop->arch.hasSyncThreadsExt != - archProp_h[HIP_ARCH_HAS_SYNC_THREAD_EXT_IDX]) { - printf("mismatch: hasSyncThreadsExt \n"); - TestPassed &= false; - } - if (prop->arch.hasSurfaceFuncs != - archProp_h[HIP_ARCH_HAS_SURFACE_FUNCS_IDX]) { - printf("mismatch: hasSurfaceFuncs \n"); - TestPassed &= false; - } - if (prop->arch.has3dGrid != - archProp_h[HIP_ARCH_HAS_3DGRID_IDX]) { - printf("mismatch: has3dGrid \n"); - TestPassed &= false; - } - if (prop->arch.hasDynamicParallelism != - archProp_h[HIP_ARCH_HAS_DYNAMIC_PARALLEL_IDX]) { - printf("mismatch: hasDynamicParallelism \n"); - TestPassed &= false; - } - return TestPassed; -} -/** - * Validates value of __HIP_ARCH_* with deviceProp.arch.has* as follows - * __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ == hasGlobalInt32Atomics - * __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ == hasGlobalFloatAtomicExch - * __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ == hasSharedInt32Atomics - * __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ == hasSharedFloatAtomicExch - * __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ == hasFloatAtomicAdd - * __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ == hasGlobalInt64Atomics - * __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ == hasSharedInt64Atomics - * __HIP_ARCH_HAS_DOUBLES__ == hasDoubles - * __HIP_ARCH_HAS_WARP_VOTE__ == hasWarpVote - * __HIP_ARCH_HAS_WARP_BALLOT__ == hasWarpBallot - * __HIP_ARCH_HAS_WARP_SHUFFLE__ == hasWarpShuffle - * __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ == hasFunnelShift - * __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ == hasThreadFenceSystem - * __HIP_ARCH_HAS_SYNC_THREAD_EXT__ == hasSyncThreadsExt - * __HIP_ARCH_HAS_SURFACE_FUNCS__ == hasSurfaceFuncs - * __HIP_ARCH_HAS_3DGRID__ == has3dGrid - * __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ == hasDynamicParallelism - */ -bool testArchitectureProperties() { - bool TestPassed = true; - int *archProp_h, *archProp_d; - archProp_h = new int[NUM_OF_ARCHPROP]; - hipDeviceProp_t prop; - int deviceCount = 0, device; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - for (device = 0; device < deviceCount; device++) { - // Inititalize archProp_h to 0 - for (int i = 0; i < NUM_OF_ARCHPROP; i++) { - archProp_h[i] = 0; - } - HIPCHECK(hipGetDeviceProperties(&prop, device)); - HIPCHECK(hipSetDevice(device)); - HIPCHECK(hipMalloc(reinterpret_cast(&archProp_d), - NUM_OF_ARCHPROP*sizeof(int))); - HIPCHECK(hipMemcpy(archProp_d, archProp_h, - NUM_OF_ARCHPROP*sizeof(int), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(mykernel, dim3(1), dim3(1), - 0, 0, archProp_d); - HIPCHECK(hipMemcpy(archProp_h, archProp_d, - NUM_OF_ARCHPROP*sizeof(int), hipMemcpyDeviceToHost)); - // Validate the host architecture property with device - // architecture property. - TestPassed &= validateDeviceMacro(archProp_h, &prop); - HIPCHECK(hipFree(archProp_d)); - } - delete[] archProp_h; - return TestPassed; -} -/** - * Validates negative scenarios for hipGetDeviceProperties - * scenario1: props = nullptr - * scenario2: device = -1 (Invalid Device) - * scenario3: device = Non Existing Device - */ -bool testInvalidParameters() { - bool TestPassed = true; - hipError_t ret; - // props = nullptr -#ifndef __HIP_PLATFORM_NVCC__ - int device; - HIPCHECK(hipGetDevice(&device)); - // this test case results in segmentation fault on NVCC - ret = hipGetDeviceProperties(nullptr, device); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {props = nullptr} Failed \n"); - } -#endif - hipDeviceProp_t prop; - ret = hipGetDeviceProperties(&prop, -1); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = -1} Failed \n"); - } - // device = Non Existing Device - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - HIPASSERT(deviceCount != 0); - ret = hipGetDeviceProperties(&prop, deviceCount); - if (ret == hipSuccess) { - TestPassed &= false; - printf("Test {device = Non Existing Device} Failed \n"); - } - return TestPassed; -} - -int main(int argc, char** argv) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - if (p_tests == 0x01) { - TestPassed = testInvalidParameters(); - } else if (p_tests == 0x02) { - TestPassed = testArchitectureProperties(); - } else { - printf("Invalid Test Case \n"); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/device/hipRuntimeGetVersion.cpp b/tests/src/runtimeApi/device/hipRuntimeGetVersion.cpp deleted file mode 100644 index a0029174b9..0000000000 --- a/tests/src/runtimeApi/device/hipRuntimeGetVersion.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipRuntimeGetVersion(int* runtimeVersion); - * On HIP/HCC path this function returns HIP runtime patch version(a 5 digit code) however on - * HIP/NVCC path this function return CUDA runtime version. - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - int runtimeVersion; - HIPCHECK(hipRuntimeGetVersion(&runtimeVersion)); - passed(); -} diff --git a/tests/src/runtimeApi/device/hipSetCachceConfig.cpp b/tests/src/runtimeApi/device/hipSetCachceConfig.cpp deleted file mode 100644 index bf8621d021..0000000000 --- a/tests/src/runtimeApi/device/hipSetCachceConfig.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -int main() { - hipFuncCache_t cacheConfig; - void* func; - hipFuncSetCacheConfig(func, cacheConfig); - passed(); -} diff --git a/tests/src/runtimeApi/device/hipSetDeviceFlags.cpp b/tests/src/runtimeApi/device/hipSetDeviceFlags.cpp deleted file mode 100644 index 26ef49861a..0000000000 --- a/tests/src/runtimeApi/device/hipSetDeviceFlags.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - unsigned flag = 0; - HIPCHECK(hipDeviceReset()); - - int deviceCount = 0; - HIPCHECK(hipGetDeviceCount(&deviceCount)); - - for (int j = 0; j < deviceCount; j++) { - HIPCHECK(hipSetDevice(j)); - - for (int i = 0; i < 4; i++) { - flag = 1 << i; - printf("Flag=%x\n", flag); - HIPCHECK(hipSetDeviceFlags(flag)); - // HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue); - } - - flag = 0; - } - - passed(); -} diff --git a/tests/src/runtimeApi/device/hipSetGetDevice.cpp b/tests/src/runtimeApi/device/hipSetGetDevice.cpp deleted file mode 100644 index 63dd1b01c9..0000000000 --- a/tests/src/runtimeApi/device/hipSetGetDevice.cpp +++ /dev/null @@ -1,679 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* - * Test designed to run on Linux based platforms - * Verifies functionality of - * -- hipSetDevice and hipGetDevice with different ROCR_VISIBLE_DEVICES and - * HIP_VISIBLE_DEVICES values set - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipSetGetDevice-invalidDevice --tests 1 - * TEST_NAMED: %t hipSetGetDevice-allValidDevice --tests 2 - * TEST_NAMED: %t hipSetGetDevice-validDev1 --computeDevCnt 1 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev2 --computeDevCnt 2 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev3 --computeDevCnt 3 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev4 --computeDevCnt 4 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev5 --computeDevCnt 5 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev6 --computeDevCnt 6 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev7 --computeDevCnt 7 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-validDev8 --computeDevCnt 8 --tests 4 - * TEST_NAMED: %t hipSetGetDevice-SetbothEnvVar --tests 5 - * HIT_END - */ - -#ifdef __linux__ -#include -#include -#endif -#include "test_common.h" - -int sequence_num = 0; -void getDeviceCount(int *numDevices) { - int fd[2], val = 0; -#ifdef __unix__ - pipe(fd); - - pid_t childPid; - childPid = fork(); - - if (childPid > 0) { // parent - close(fd[1]); - read(fd[0], &val, sizeof(val)); - close(fd[0]); - *numDevices = val; - - } else if (childPid == 0) { // child - int devCnt = 0; - close(fd[0]); - -#ifdef __HIP_PLATFORM_NVCC__ - unsetenv("CUDA_VISIBLE_DEVICES"); -#else - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); -#endif - - hipGetDeviceCount(&devCnt); - - write(fd[1], &devCnt, sizeof(devCnt)); - close(fd[1]); - exit(0); - - } else { - failed("fork() failed. Exiting the test\n"); - } -#else - printf("skipping testcase for non-unix systems\n"); -#endif -} - -#define MAX_SIZE 1024 - -// Pass either -1 in deviceNumber or invalid device number -bool testInvalidDevice(int numDevices, bool useRocrEnv, int deviceNumber) { - bool testResult = true; - int device; - int tempCount = 0; - int setDeviceErrorCheck = 0; - int getDeviceErrorCheck = 0; - int getDeviceCountErrorCheck = 0; -#ifdef __unix__ - int fd[2]; - pipe(fd); - - pid_t cPid; - cPid = fork(); - - char visibleDeviceString[MAX_SIZE] = {}; - snprintf(visibleDeviceString, MAX_SIZE, "%d", deviceNumber); - - if (cPid == 0) { // child - hipError_t err; -#ifdef __HIP_PLATFORM_NVCC__ - setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1); -#else - if (true == useRocrEnv) { - setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1); - } else { - setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1); - } -#endif - err = hipGetDeviceCount(&tempCount); - if (err != hipSuccess) { - getDeviceCountErrorCheck = 1; - } - for (int i = 0; i < numDevices; i++) { - err = hipSetDevice(i); - if (err != hipSuccess) { - setDeviceErrorCheck+= 1; - } - - err = hipGetDevice(&device); - if (err != hipSuccess) { - getDeviceErrorCheck+= 1; - } - } - - if ((getDeviceCountErrorCheck == 1) && (setDeviceErrorCheck == numDevices) - && (getDeviceErrorCheck == numDevices)) { - testResult = true; - - } else { - printf("Test failed for invalid device\n"); - testResult = false; - } - - close(fd[0]); - write(fd[1], &testResult, sizeof(testResult)); - close(fd[1]); - exit(0); - - } else if (cPid > 0) { // parent - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - - } else { - printf("fork() failed\n"); - testResult = false; - } -#else - printf("skipping testcase for non-unix systems\n"); -#endif - return testResult; -} - -int deviceListLength = 1; -int parseExtraArguments(int argc, char* argv[]) { - int i = 0; - for (i = 1; i < argc; i++) { - const char* arg = argv[i]; - if (!strcmp(arg, " ")) { - // skip NULL args. - } else if (!strcmp(arg, "--computeDevCnt")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &deviceListLength)) { - failed("Bad deviceListLength argument"); - } - } else { - failed("Bad argument"); - } - } - return i; -} - -bool testValidDevices(int numDevices, bool useRocrEnv, int *deviceList, - int deviceListLength) { - bool testResult = true; - int tempCount = 0; - int device; - int setDeviceErrorCheck = 0; - int getDeviceErrorCheck = 0; - int getDeviceCountErrorCheck = 0; - int *deviceListPtr = deviceList; - char visibleDeviceString[MAX_SIZE] = {}; -#ifdef __unix__ - - if ((NULL == deviceList) || ((deviceListLength < 1) || - deviceListLength > numDevices)) { - printf("Invalid argument for number of devices. Skipping current test\n"); - return testResult; - } - - for (int i = 0; i < deviceListLength; i++) { - if (NULL == deviceListPtr) { - printf("Invalid gpu index. Skipping current test\n"); - return testResult; - } - snprintf(visibleDeviceString + strlen(visibleDeviceString), MAX_SIZE, "%d,", - *deviceListPtr++); - } - - visibleDeviceString[strlen(visibleDeviceString)-1] = 0; - - int fd[2]; - pipe(fd); - - pid_t cPid; - cPid = fork(); - - if (cPid == 0) { -#ifdef __HIP_PLATFORM_NVCC__ - unsetenv("CUDA_VISIBLE_DEVICES"); - setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1); -#else - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - if (true == useRocrEnv) { - setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1); - } else { - setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1); - } -#endif - - - hipError_t err; - err = hipGetDeviceCount(&tempCount); - - if (tempCount == deviceListLength) { - getDeviceCountErrorCheck = 1; - } else { - printf("hipGetDeviceCount failed. return value: %u\n", hipError_t(err)); - } - - for (int i = 0; i < numDevices; i++) { - err = hipSetDevice(i); - if (err != hipSuccess) { - setDeviceErrorCheck+= 1; - } - - err = hipGetDevice(&device); - if (err != hipSuccess) { - getDeviceErrorCheck+= 1; - } - } - - if ((getDeviceCountErrorCheck == 1) && (setDeviceErrorCheck == - (numDevices-deviceListLength)) && (getDeviceErrorCheck == 0)) { - testResult = true; - - } else { - printf("Test failed for device count %d\n", deviceListLength); - testResult = false; - } - - close(fd[0]); - write(fd[1], &testResult, sizeof(testResult)); - close(fd[1]); - exit(0); - - } else if (cPid > 0) { - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - - } else { - printf("fork() failed\n"); - testResult = false; - } -#else - printf("skipping testcase for non unix system \n"); -#endif - return testResult; -} - -bool testValidDevicesBasic() { - bool testResult = true; - int numDevices = 0; - int device; - int validateCount = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - printf("Available compute devices in the system: %d\n", numDevices); - - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipGetDevice(&device)); - if (device == i) { - validateCount+= 1; - } - } - if (numDevices != validateCount) { - testResult = false; - } - - return testResult; -} - - -void Initialize(int *deviceList, int numDevices, int count, - char min_visibleDeviceString[], char max_visibleDeviceString[]) { - int *deviceListPtr = deviceList; - for (int i =0; i < count; i++) { - if (i == count-1) { - snprintf(min_visibleDeviceString + strlen(min_visibleDeviceString), - MAX_SIZE, "%d", *deviceListPtr++); - } else { - snprintf(min_visibleDeviceString + strlen(min_visibleDeviceString), - MAX_SIZE, "%d,", *deviceListPtr++); - } - } - for (int i =0; i < numDevices; i++) { - if (i == numDevices-1) { - snprintf(max_visibleDeviceString + strlen(max_visibleDeviceString), - MAX_SIZE, "%d", i); - } else { - snprintf(max_visibleDeviceString + strlen(max_visibleDeviceString), - MAX_SIZE, "%d,", i); - } - } -} - -bool testMaxRvdMinHvd(int numDevices, int *deviceList, int count) { - bool testResult = true; - int device; -#ifdef __unix__ - int validateCount = 0; - char min_visibleDeviceString[MAX_SIZE] = {0}; - char max_visibleDeviceString[MAX_SIZE] = {0}; - int fd[2]; - pipe(fd); - pid_t cPid; - cPid = fork(); - if (cPid == 0) { // child - Initialize(deviceList, numDevices, - count, min_visibleDeviceString, max_visibleDeviceString); - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - setenv("ROCR_VISIBLE_DEVICES", max_visibleDeviceString, 1); - setenv("HIP_VISIBLE_DEVICES", min_visibleDeviceString, 1); - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipGetDevice(&device)); - if (device == i) { - validateCount+= 1; - } - } - if (count != validateCount) { - testResult = false; - } - } else if (cPid > 0) { - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - } else { - printf("fork() failed\n"); - testResult = false; - } -#else - printf("skipping testcase for non unix system \n"); -#endif - return testResult; -} - -bool testRvdCvd(int numDevices, int *deviceList, int count) { - bool testResult = true; - int device; -#ifdef __unix__ - int validateCount = 0; - char min_visibleDeviceString[MAX_SIZE] = {0}; - char max_visibleDeviceString[MAX_SIZE] = {0}; - int fd[2]; - pipe(fd); - pid_t cPid; - cPid = fork(); - if (cPid == 0) { // child - Initialize(deviceList, numDevices, count, - min_visibleDeviceString, max_visibleDeviceString); - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - setenv("ROCR_VISIBLE_DEVICES", max_visibleDeviceString, 1); - setenv("CUDA_VISIBLE_DEVICES", min_visibleDeviceString, 1); - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipGetDevice(&device)); - if (device == i) { - validateCount+= 1; - } - } - if (count != validateCount) { - testResult = false; - } - } else if (cPid > 0) { - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - } else { - printf("fork() failed\n"); - testResult = false; - } -#else - printf("skipping testcase for non unix system \n"); -#endif - return testResult; -} - -bool testMinRvdMaxHvd(int numDevices, int *deviceList, int count) { - bool testResult = true; - int device; -#ifdef __unix__ - int validateCount = 0; - char min_visibleDeviceString[MAX_SIZE] = {0}; - char max_visibleDeviceString[MAX_SIZE] = {0}; - int fd[2]; - pipe(fd); - pid_t cPid; - cPid = fork(); - if (cPid == 0) { // child - Initialize(deviceList, numDevices, count, - min_visibleDeviceString, max_visibleDeviceString); - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - setenv("ROCR_VISIBLE_DEVICES", min_visibleDeviceString, 1); - setenv("HIP_VISIBLE_DEVICES", max_visibleDeviceString, 1); - HIPCHECK(hipGetDeviceCount(&numDevices)); - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipGetDevice(&device)); - if (device == i) { - validateCount+= 1; - } - } - if (count != validateCount) { - testResult = false; - } - close(fd[0]); - write(fd[1], &testResult, sizeof(testResult)); - close(fd[1]); - exit(0); - } else if (cPid > 0) { - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - } else { - printf("fork() failed\n"); - testResult = false; - } -#else - printf("skipping testcase for non unix system \n"); -#endif - return testResult; -} - - -bool testDeviceListSequence(int numDevices, bool useRocrEnv, - int *deviceList, int count) { - bool testResult = true; -#ifdef __unix__ - int validateCount = 0; - int device; - char visibleDeviceString[MAX_SIZE] = {0}; - int tempCount = 0; - int *deviceListPtr = deviceList; - int fd[2]; - if (NULL == deviceList) { - printf("Invalid argument for number of devices. Skipping current test\n"); - return testResult; - } - - pipe(fd); - pid_t cPid; - cPid = fork(); - for (int i =0; i < numDevices; i++) { - if (i == numDevices-1) { - snprintf(visibleDeviceString + strlen(visibleDeviceString), - MAX_SIZE, "%d", *deviceListPtr++); - } else { - snprintf(visibleDeviceString + strlen(visibleDeviceString), - MAX_SIZE, "%d,", *deviceListPtr++); - } - } - if (cPid == 0) { // child - hipError_t err; -#ifdef __HIP_PLATFORM_NVCC__ - unsetenv("CUDA_VISIBLE_DEVICES"); - setenv("CUDA_VISIBLE_DEVICES", visibleDeviceString, 1); -#else - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - if (true == useRocrEnv) { - setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1); - } else { - setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1); - } -#endif - err = hipGetDeviceCount(&tempCount); - if (err == hipSuccess) { - for (int i = 0; i < numDevices; i++) { - err = hipSetDevice(i); - if (err == hipSuccess) { - err = hipGetDevice(&device); - if (err == hipSuccess && device == i) { - validateCount += 1; - } - } - } - if (count != tempCount || tempCount != validateCount) { - testResult = false; - } else { - testResult = true; - } - - } else { -#ifdef __HIP_PLATFORM_NVCC__ - testResult = true; -#endif - } - close(fd[0]); - write(fd[1], &testResult, sizeof(testResult)); - close(fd[1]); - exit(0); - } else if (cPid > 0) { // parent - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - } else { - printf("fork() failed\n"); - testResult = false; - } -#else - printf("skipping testcase for non unix system \n"); -#endif - return testResult; -} - - -int main(int argc, char* argv[]) { - bool testResult = true; - int numDevices = 0; - int device; - int deviceList[MAX_SIZE]; - int extraArgs = 0; - -#ifdef __unix__ - getDeviceCount(&numDevices); - - if (numDevices == 0) { - failed("No gpus found. exiting\n"); - } - - printf("Available compute devices in the system: %d\n", numDevices); - - extraArgs = HipTest::parseStandardArguments(argc, argv, false); - parseExtraArguments(extraArgs, argv); - if (p_tests == 1) { - printf("\nRunning test for invalid compute device\n"); -#ifndef __HIP_PLATFORM_NVCC__ - // Test setting -1 to ROCR_VISIBLE_DEVICES - testResult &= testInvalidDevice(numDevices, true, -1); - - // Test setting invalid device to ROCR_VISIBLE_DEVICES - testResult &= testInvalidDevice(numDevices, true, numDevices); -#endif - // Test setting -1 to HIP_VISIBLE_DEVICES - testResult &= testInvalidDevice(numDevices, false, -1); - // Test setting invalide device to HIP_VISIBLE_DEVICES - testResult &= testInvalidDevice(numDevices, false, numDevices); - } else if (p_tests == 2) { - // Test for all available devices - printf("\nRunning test for all available compute devices\n"); - - for (int i = 0; i < numDevices; i++) { - deviceList[i] = i; - } - -#ifndef __HIP_PLATFORM_NVCC__ - testResult &= testValidDevices(numDevices, true, deviceList, numDevices); -#endif - testResult &= testValidDevices(numDevices, false, deviceList, numDevices); - } else if (p_tests == 3) { - printf("Running test for various invalid and valid sequences\n"); - int count; - if (numDevices >= 2) - count = 2; - else - count = numDevices; - // Assigning values to deviceList in reverse order - for (int i=0; i < numDevices; i++) { - if (i%2 == 0) { - deviceList[i] = -1; - } else { - deviceList[i] = i; - } - } -#ifndef __HIP_PLATFORM_NVCC__ - testResult = testDeviceListSequence(numDevices, true, deviceList, count); -#endif - testResult = testDeviceListSequence(numDevices, false, deviceList, count); - count = 1; - for (int i=0; i < numDevices; i++) { - if (i/2 == 0) { - deviceList[i] = 0; - } else { - deviceList[i] = i; - } - } -#ifndef __HIP_PLATFORM_NVCC__ - testResult = testDeviceListSequence(numDevices, true, deviceList, count); -#endif - testResult = testDeviceListSequence(numDevices, false, deviceList, count); - if (numDevices == 1) { - deviceList[0] = 0; - } else { - for (int i=0; i < numDevices; i++) { - deviceList[i] = 1; - } - } -#ifndef __HIP_PLATFORM_NVCC__ - testResult &= testDeviceListSequence(numDevices, true, deviceList, count); -#endif - testResult &= testDeviceListSequence(numDevices, false, deviceList, count); - } else if (p_tests == 4) { - // Test for subset of available gpus - for (int i=0; i < deviceListLength; i++) { - deviceList[i] = deviceListLength-1-i; - } - printf("\nRunning test for %d compute devices\n", deviceListLength); -#ifndef __HIP_PLATFORM_NVCC__ - testResult &= testValidDevices(numDevices, true, deviceList, - deviceListLength); -#endif - testResult &= testValidDevices(numDevices, false, deviceList, - deviceListLength); - } else if (p_tests == 5) { -#ifndef __HIP_PLATFORM_NVCC__ - int count = 0; - if (numDevices == 1) { - deviceList[0] = 0; - count = 1; - } else { - for (int i=0; i < numDevices; i++) { - if (i%2 == 0) { - deviceList[count] = i; - count++; - } - } - } - testResult &= testMinRvdMaxHvd(numDevices, deviceList, count); - testResult &= testMaxRvdMinHvd(numDevices, deviceList, count); - testResult &= testRvdCvd(numDevices, deviceList, count); -#endif - } else { - failed("Didnt receive any valid option. Try options 1 to 5\n"); - } -#else - printf("Running basic test on Windows\n"); - testResult &= testValidDevicesBasic(); - -#endif - if (testResult == true) { - passed(); - } else { - failed("One or more tests failed\n"); - } -} diff --git a/tests/src/runtimeApi/error/hipPeekAtLastError.cpp b/tests/src/runtimeApi/error/hipPeekAtLastError.cpp deleted file mode 100644 index 3d5183f617..0000000000 --- a/tests/src/runtimeApi/error/hipPeekAtLastError.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipGetDevice(int *device); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - hipSetDevice(-1); - if (hipPeekAtLastError() != hipSuccess) passed(); -} diff --git a/tests/src/runtimeApi/event/hipEvent.cpp b/tests/src/runtimeApi/event/hipEvent.cpp deleted file mode 100644 index a004148620..0000000000 --- a/tests/src/runtimeApi/event/hipEvent.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -bool hipEvent_Nullcheck() { - bool TestStatus = true; - hipError_t err; - hipEvent_t start_event; - // Passing nullptr to hipEventCreate API - err = hipEventCreate(nullptr); - if (err == hipSuccess) { - printf("hipEventCreate failed when nullptr is passed \n"); - TestStatus = false; - } - // Passing nullptr to hipEventCreateWithFlags API - err = hipEventCreateWithFlags(nullptr, 0); - if (err == hipSuccess) { - printf("hipEventCreatewithFlags failed when nullptr is passed \n"); - TestStatus = false; - } - // Passing illegal/unknown flag to hipEventCreateWithFlags API - err = hipEventCreateWithFlags(&start_event, 10); - if (err == hipSuccess) { - printf("hipEventCreatewithFlags failed when illegal flag is passed \n"); - TestStatus = false; - } - // Passing nullptr to hipEventSynchronize API - err = hipEventSynchronize(nullptr); - if (err == hipSuccess) { - printf("hipEventSynchronize failed when nullptr is passed \n"); - TestStatus = false; - } - // Passing nullptr to hipEventQuery API - err = hipEventQuery(nullptr); - if (err == hipSuccess) { - printf("hipEventQuery failed when nullptr is passed \n"); - TestStatus = false; - } - // Passing nullptr to hipEventDestroy API - err = hipEventDestroy(nullptr); - if (err == hipSuccess) { - printf("hipEventDestroy failed when nullptr is passed \n"); - TestStatus = false; - } - - return TestStatus; -} - -int main() { - bool TestPassed = true; - TestPassed = hipEvent_Nullcheck(); - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} - diff --git a/tests/src/runtimeApi/event/hipEventElapsedTime.cpp b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp deleted file mode 100644 index dd7cc659fd..0000000000 --- a/tests/src/runtimeApi/event/hipEventElapsedTime.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -void NegativeTests(){ - - // Null pointers - { - hipEvent_t start,end; - float tms = 1.0f; - HIPASSERT(hipEventElapsedTime(nullptr,start,end) == hipErrorInvalidValue); -#ifndef __HIP_PLATFORM_NVIDIA__ - // On NVCC platform API throws seg fault hence skipping - HIPASSERT(hipEventElapsedTime(&tms,nullptr,end) == hipErrorInvalidHandle); - HIPASSERT(hipEventElapsedTime(&tms,start,nullptr) == hipErrorInvalidHandle); -#endif - } - - // Event created using disabled timing - { - float timeElapsed = 1.0f; - hipEvent_t start, stop; - HIPCHECK(hipEventCreateWithFlags(&start,hipEventDisableTiming)); - HIPCHECK(hipEventCreateWithFlags(&stop,hipEventDisableTiming)); - HIPASSERT(hipEventElapsedTime(&timeElapsed, start, stop) == hipErrorInvalidHandle); - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); - } - - // events created different devices - { - int devCount = 0; - HIPCHECK(hipGetDeviceCount(&devCount)); - if (devCount > 1){ - // create event on dev=0 - HIPCHECK(hipSetDevice(0)); - hipEvent_t start; - hipEvent_t start1; - HIPCHECK(hipEventCreate(&start)); - HIPCHECK(hipEventCreate(&start1)); - - HIPCHECK(hipEventRecord(start, nullptr)); - HIPCHECK(hipEventSynchronize(start)); - - // create event on dev=1 - HIPCHECK(hipSetDevice(1)); - hipEvent_t stop; - HIPCHECK(hipEventCreate(&stop)); - - // start1 on device 0 but null stream on device 1 - HIPASSERT(hipEventRecord(start1, nullptr) == hipErrorInvalidHandle); - - HIPCHECK(hipEventRecord(stop, nullptr)); - HIPCHECK(hipEventSynchronize(stop)); - - float tElapsed = 1.0f; - // start on device 0 but stop on device 1 - HIPASSERT(hipEventElapsedTime(&tElapsed,start,stop) == hipErrorInvalidHandle); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(start1)); - HIPCHECK(hipEventDestroy(stop)); - } - } -} - -void PositiveTest(){ - hipEvent_t start; - HIPCHECK(hipEventCreate(&start)); - - hipEvent_t stop; - HIPCHECK(hipEventCreate(&stop)); - - HIPCHECK(hipEventRecord(start, nullptr)); - HIPCHECK(hipEventSynchronize(start)); - - HIPCHECK(hipEventRecord(stop, nullptr)); - HIPCHECK(hipEventSynchronize(stop)); - - float tElapsed = 1.0f; - HIPCHECK(hipEventElapsedTime(&tElapsed, start, stop)); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); -} - -int main(){ - - NegativeTests(); - PositiveTest(); - passed(); -} diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp deleted file mode 100644 index b2b8f2f4d0..0000000000 --- a/tests/src/runtimeApi/event/hipEventIpc.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test hipEventRecord serialization behavior. -// Through manual inspection of the reported timestamps, can determine if recording a NULL event -// forces synchronization : set - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t --iterations 10 - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; - if (blocks > 1024) blocks = 1024; - if (blocks == 0) blocks = 1; - - printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, - ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); - printf("iterations=%d\n", iterations); - - size_t Nbytes = N * sizeof(float); - - float *A_h, *B_h, *C_h; - float *A_d, *B_d, *C_d; - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - hipEvent_t start, stop; - - // NULL stream check: - HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); - HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); - - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - - for (int i = 0; i < iterations; i++) { - //--- START TIMED REGION - long long hostStart = HipTest::get_time(); - // Record the start event - HIPCHECK(hipEventRecord(start, NULL)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - - - HIPCHECK(hipEventRecord(stop, NULL)); - HIPCHECK(hipEventSynchronize(stop)); - HIPCHECK(hipEventQuery(stop)); - long long hostStop = HipTest::get_time(); - //--- STOP TIMED REGION - - - float eventMs = 1.0f; - // should fail due to hipEventDisableTiming - HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); - float hostMs = HipTest::elapsed_time(hostStart, hostStop); - - printf("host_time (gettimeofday) =%6.3fms\n", hostMs); - printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - printf("\n"); - - } - hipIpcEventHandle_t ipc_handle; - HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); - - hipEvent_t ipc_event; - // hipIpcOpenEventHandle() should be called in a different process - // tests/src/ipc/hipMultiProcIpcEvent.cpp is the right sample in different process - HIPCHECK_API(hipIpcOpenEventHandle(&ipc_event, ipc_handle), hipErrorInvalidContext); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - printf("check:\n"); - - HipTest::checkVectorADD(A_h, B_h, C_h, N, true); - - passed(); -} diff --git a/tests/src/runtimeApi/event/hipEventMultiThreaded.cpp b/tests/src/runtimeApi/event/hipEventMultiThreaded.cpp deleted file mode 100644 index b6e0e1c3fd..0000000000 --- a/tests/src/runtimeApi/event/hipEventMultiThreaded.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include - -#define THREADS 2 // threads per core -#define MAX_NUM_THREADS 512 -#define ITER 5 // total loop number - -// 5 loops and 2 threads per core are enough for function verification. -// You may adjust them for your test purpose. - -extern "C" __global__ void WaitKernel(int *Ad, int clockrate) { - uint64_t wait_t = 500, - start = clock64()/clockrate, cycles; - do { cycles = clock64()/clockrate-start;} while (cycles < wait_t); - *Ad = 1; -} - -extern "C" __global__ void WaitKernel_gfx11(int *Ad, int clockrate) { -#ifdef __HIP_PLATFORM_AMD__ - uint64_t wait_t = 500, - start = wall_clock64()/clockrate, cycles; - do { cycles = wall_clock64()/clockrate-start;} while (cycles < wait_t); - *Ad = 1; -#endif -} - -void t1(hipEvent_t start, hipStream_t stream1, int clkRate, int *A, int *Ad) { - *A = 0; - - auto WaitKernel_used = IsGfx11() ? WaitKernel_gfx11 : WaitKernel; - hipLaunchKernelGGL(HIP_KERNEL_NAME(WaitKernel_used), dim3(1), dim3(1), 0, stream1, Ad, clkRate); - - HIPCHECK(hipEventRecord(start, stream1)); - -} - -int main(int argc, char* argv[]) { - - int NUM_THREADS = min(THREADS * std::thread::hardware_concurrency(), MAX_NUM_THREADS); - int clkRate = 0; - std::vector A, Ad; - bool TestPassed = true; - - for (int i = 0; i < NUM_THREADS; i++) { - int *aPtr, *adPtr; - aPtr = (int *)malloc(sizeof(int)); - A.push_back(aPtr); - Ad.push_back(adPtr); - HIPCHECK(hipHostRegister(A[i], sizeof(int), 0)); - HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A[i], 0)); - } - - if (IsGfx11()) { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0)); - } else { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); - } - hipStream_t stream1; - hipStreamCreate(&stream1); - hipEvent_t start; - hipEventCreate(&start); - std::thread t[NUM_THREADS]; - - printf("NUM_THREADS=%d\n", NUM_THREADS); - for (int i = 0; i < ITER; i++) { - printf("loop %d/%d\n", i, ITER); - for (int j = 0; j < NUM_THREADS; j++) { - t[j] = std::thread(t1, start, stream1, clkRate, A[j], Ad[j]); - } - - for (int j = 0 ; j < NUM_THREADS; j++) { - t[j].join(); - } - - HIPCHECK(hipStreamWaitEvent(stream1, start, 0)); - hipError_t err = hipEventQuery(start); - while(err != hipSuccess) { - err = hipEventQuery(start); - } - - for (int j = 0; j < NUM_THREADS; j++) { - if (*A[j] != 1) { - TestPassed = false; - break; - } - } - - if (!TestPassed) { - failed("Test Failed due to possible race condition!"); - } - } - - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipEventDestroy(start)); - - for (auto ptr: A) { - free(ptr); - } - - A.clear(); - Ad.clear(); - - passed(); -} diff --git a/tests/src/runtimeApi/event/hipEventRecord.cpp b/tests/src/runtimeApi/event/hipEventRecord.cpp deleted file mode 100644 index 0c03e4d1e8..0000000000 --- a/tests/src/runtimeApi/event/hipEventRecord.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test hipEventRecord serialization behavior. -// Through manual inspection of the reported timestamps, can determine if recording a NULL event -// forces synchronization : set - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t --iterations 10 - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; - if (blocks > 1024) blocks = 1024; - if (blocks == 0) blocks = 1; - - printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, - ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); - printf("iterations=%d\n", iterations); - - size_t Nbytes = N * sizeof(float); - - float *A_h, *B_h, *C_h; - float *A_d, *B_d, *C_d; - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - hipEvent_t start, stop; - - // NULL stream check: - HIPCHECK(hipEventCreate(&start)); - HIPCHECK(hipEventCreate(&stop)); - - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - - for (int i = 0; i < iterations; i++) { - //--- START TIMED REGION - long long hostStart = HipTest::get_time(); - // Record the start event - HIPCHECK(hipEventRecord(start, NULL)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - - - HIPCHECK(hipEventRecord(stop, NULL)); - HIPCHECK(hipEventSynchronize(stop)); - long long hostStop = HipTest::get_time(); - //--- STOP TIMED REGION - - - float eventMs = 1.0f; - HIPCHECK(hipEventElapsedTime(&eventMs, start, stop)); - float hostMs = HipTest::elapsed_time(hostStart, hostStop); - - printf("host_time (gettimeofday) =%6.3fms\n", hostMs); - printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - printf("\n"); - - // Make sure timer is timing something... - HIPASSERT(eventMs > 0.0f); - } - - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); - - printf("check:\n"); - - HipTest::checkVectorADD(A_h, B_h, C_h, N, true); - - - passed(); -} diff --git a/tests/src/runtimeApi/event/record_event.cpp b/tests/src/runtimeApi/event/record_event.cpp deleted file mode 100644 index dc0dcc7ece..0000000000 --- a/tests/src/runtimeApi/event/record_event.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include "test_common.h" - -enum SyncMode { - syncNone, - syncStream, - syncStopEvent, -}; - - -const char* syncModeString(int syncMode) { - switch (syncMode) { - case syncNone: - return "syncNone"; - case syncStream: - return "syncStream"; - case syncStopEvent: - return "syncStopEvent"; - default: - return "unknown"; - }; -}; - - -void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_t stream, - int waitStart, SyncMode syncMode) { - if (!(testMask & p_tests)) { - return; - } - printf("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n", testMask, stream, waitStart, - syncModeString(syncMode)); - - size_t sizeBytes = numElements * sizeof(int); - - int count = 100; - int init0 = 0; - HIPCHECK(hipMemset(C_d, init0, sizeBytes)); - for (int i = 0; i < numElements; i++) { - C_h[i] = -1; // initialize - } - - hipEvent_t neverCreated = 0, neverRecorded, timingDisabled; - HIPCHECK(hipEventCreate(&neverRecorded)); - HIPCHECK(hipEventCreateWithFlags(&timingDisabled, hipEventDisableTiming)); - - hipEvent_t start, stop; - HIPCHECK(hipEventCreate(&start)); - HIPCHECK(hipEventCreate(&stop)); - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - - HIPCHECK(hipEventRecord(timingDisabled, stream)); - // sandwhich a kernel: - HIPCHECK(hipEventRecord(start, stream)); - hipLaunchKernelGGL(HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, stream, - static_cast(C_d), C_h, numElements, count); - HIPCHECK(hipEventRecord(stop, stream)); - - - if (waitStart) { - HIPCHECK(hipEventSynchronize(start)); - } - - - hipError_t expectedStopError = hipSuccess; - - // How to wait for the events to finish: - switch (syncMode) { - case syncNone: - expectedStopError = hipErrorNotReady; - break; - case syncStream: - HIPCHECK(hipStreamSynchronize(stream)); // wait for recording to finish... - break; - case syncStopEvent: - HIPCHECK(hipEventSynchronize(stop)); - break; - default: - assert(0); - }; - - float t; - - hipError_t e = hipEventElapsedTime(&t, start, start); - if ((e != hipSuccess) && (e != hipErrorNotReady || syncMode != syncNone)) { - failed("start event not in expected state, was %d=%s\n", e, hipGetErrorName(e)); - } - - if (e == hipSuccess) assert(t == 0.0f); - - - // stop usually ready unless we skipped the synchronization (syncNone) - HIPCHECK_API(hipEventElapsedTime(&t, stop, stop), expectedStopError); - if (e == hipSuccess) assert(t == 0.0f); - - - e = hipEventElapsedTime(&t, start, stop); - HIPCHECK_API(e, expectedStopError); - if (expectedStopError == hipSuccess) assert(t > 0.0f); - printf("time=%6.2f error=%s\n", t, hipGetErrorName(e)); - - e = hipEventElapsedTime(&t, stop, start); - HIPCHECK_API(e, expectedStopError); - if (expectedStopError == hipSuccess) assert(t < 0.0f); - printf("negtime=%6.2f error=%s\n", t, hipGetErrorName(e)); - - - { - // Check some error conditions for incomplete events: - HIPCHECK_API(hipEventElapsedTime(&t, timingDisabled, stop), hipErrorInvalidHandle); - HIPCHECK_API(hipEventElapsedTime(&t, start, timingDisabled), hipErrorInvalidHandle); - - HIPCHECK_API(hipEventElapsedTime(&t, neverCreated, stop), hipErrorInvalidHandle); - HIPCHECK_API(hipEventElapsedTime(&t, start, neverCreated), hipErrorInvalidHandle); - - HIPCHECK_API(hipEventElapsedTime(&t, neverRecorded, stop), hipErrorInvalidHandle); - HIPCHECK_API(hipEventElapsedTime(&t, start, neverRecorded), hipErrorInvalidHandle); - } - - HIPCHECK(hipEventDestroy(neverRecorded)); - HIPCHECK(hipEventDestroy(timingDisabled)); - - HIPCHECK(hipEventDestroy(start)); - HIPCHECK(hipEventDestroy(stop)); - - // Clear out everything: - HIPCHECK(hipDeviceSynchronize()); - - printf("test: OK \n"); -} - - -void runTests(int64_t numElements) { - size_t sizeBytes = numElements * sizeof(int); - - printf("test: starting sequence with sizeBytes=%zu bytes, %6.2f MB\n", sizeBytes, - sizeBytes / 1024.0 / 1024.0); - - - int *C_h, *C_d; - HIPCHECK(hipMalloc(&C_d, sizeBytes)); - HIPCHECK(hipHostMalloc(&C_h, sizeBytes)); - - hipStream_t stream; - HIPCHECK(hipStreamCreateWithFlags(&stream, 0x0)); - - // for (int waitStart=0; waitStart<2; waitStart++) { - for (int waitStart = 1; waitStart >= 0; waitStart--) { - unsigned W = waitStart ? 0x1000 : 0; - test(W | 0x01, C_d, C_h, numElements, 0, 0, syncNone); - test(W | 0x02, C_d, C_h, numElements, stream, 0, syncNone); - test(W | 0x04, C_d, C_h, numElements, 0, waitStart, syncStream); - test(W | 0x08, C_d, C_h, numElements, stream, waitStart, syncStream); - test(W | 0x10, C_d, C_h, numElements, 0, waitStart, syncStopEvent); - test(W | 0x20, C_d, C_h, numElements, stream, waitStart, syncStopEvent); - } - - - HIPCHECK(hipStreamDestroy(stream)); - HIPCHECK(hipFree(C_d)); - HIPCHECK(hipHostFree(C_h)); -} - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/); - - runTests(80000000); - - passed(); -} diff --git a/tests/src/runtimeApi/graph/hipChildGraph.cpp b/tests/src/runtimeApi/graph/hipChildGraph.cpp deleted file mode 100644 index c456072787..0000000000 --- a/tests/src/runtimeApi/graph/hipChildGraph.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include -#include -#include -#include -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ -#define THREADS_PER_BLOCK 512 -#define GRAPH_LAUNCH_ITERATIONS 100 -__global__ void reduce(float* d_in, double* d_out, size_t inputSize, size_t outputSize) { - int myId = threadIdx.x + blockDim.x * blockIdx.x; - int tid = threadIdx.x; - for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { - if (tid < s) { - d_in[myId] += d_in[myId + s]; - } - __syncthreads(); - } - if (tid == 0) { - d_out[blockIdx.x] = d_in[myId]; - } -} -__global__ void reduceFinal(double* d_in, double* d_out, size_t inputSize) { - int myId = threadIdx.x + blockDim.x * blockIdx.x; - int tid = threadIdx.x; - for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { - if (tid < s) { - d_in[myId] += d_in[myId + s]; - } - __syncthreads(); - } - if (tid == 0) { - *d_out = d_in[myId]; - } -} -void init_input(float* a, size_t size) { - for (size_t i = 0; i < size; i++) a[i] = (rand() & 0xFF) / (float)RAND_MAX; -} - -bool hipGraphsManual(float* inputVec_h, float* inputVec_d, double* outputVec_d, double* result_d, - size_t inputSize, size_t numOfBlocks) { - hipStream_t streamForGraph; - hipGraph_t graph, childgraph; - std::vector nodeDependencies; - hipGraphNode_t memcpyNode, kernelNode, memsetNode1, memsetNode2, childGraphNode; - double result_h = 0.0; - HIPCHECK(hipStreamCreate(&streamForGraph)); - auto start = std::chrono::high_resolution_clock::now(); - hipKernelNodeParams kernelNodeParams = {0}; - hipMemsetParams memsetParams = {0}; - memsetParams.dst = (void*)outputVec_d; - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(float); - memsetParams.width = numOfBlocks * 2; - memsetParams.height = 1; - HIPCHECK(hipGraphCreate(&graph, 0)); - HIPCHECK(hipGraphCreate(&childgraph, 0)); - HIPCHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, inputVec_d, inputVec_h, - sizeof(float) * inputSize, hipMemcpyHostToDevice)); - HIPCHECK(hipGraphAddMemsetNode(&memsetNode1, graph, NULL, 0, &memsetParams)); - - void* kernelArgs[4] = {(void*)&inputVec_d, (void*)&outputVec_d, &inputSize, &numOfBlocks}; - kernelNodeParams.func = (void*)reduce; - kernelNodeParams.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1); - kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = (void**)kernelArgs; - kernelNodeParams.extra = NULL; - HIPCHECK(hipGraphAddKernelNode(&kernelNode, childgraph, NULL, 0, &kernelNodeParams)); - nodeDependencies.clear(); - nodeDependencies.push_back(kernelNode); - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = result_d; - memsetParams.value = 0; - memsetParams.elementSize = sizeof(float); - memsetParams.width = 2; - memsetParams.height = 1; - HIPCHECK(hipGraphAddMemsetNode(&memsetNode2, childgraph, NULL, 0, &memsetParams)); - nodeDependencies.push_back(memsetNode2); - memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = (void*)reduceFinal; - kernelNodeParams.gridDim = dim3(1, 1, 1); - kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - kernelNodeParams.sharedMemBytes = 0; - void* kernelArgs2[3] = {(void*)&outputVec_d, (void*)&result_d, &numOfBlocks}; - kernelNodeParams.kernelParams = kernelArgs2; - kernelNodeParams.extra = NULL; - HIPCHECK(hipGraphAddKernelNode(&kernelNode, childgraph, nodeDependencies.data(), - nodeDependencies.size(), &kernelNodeParams)); - nodeDependencies.clear(); - nodeDependencies.push_back(memcpyNode); - nodeDependencies.push_back(memsetNode1); - HIPCHECK(hipGraphAddChildGraphNode(&childGraphNode, graph, nodeDependencies.data(), - nodeDependencies.size(), childgraph)); - nodeDependencies.clear(); - nodeDependencies.push_back(childGraphNode); - HIPCHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &result_h, result_d, sizeof(double), - hipMemcpyDeviceToHost)); - - hipGraphExec_t graphExec; - HIPCHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - - auto start1 = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { - HIPCHECK(hipGraphLaunch(graphExec, streamForGraph)); - } - HIPCHECK(hipStreamSynchronize(streamForGraph)); - double result_h_cpu = 0.0; - for (int i = 0; i < inputSize; i++) { - result_h_cpu += inputVec_h[i]; - } - if (result_h_cpu != result_h) { - printf("Final reduced sum = %lf %lf\n", result_h_cpu, result_h); - return false; - } - auto stop = std::chrono::high_resolution_clock::now(); - auto resultWithInit = std::chrono::duration(stop - start); - auto resultWithoutInit = std::chrono::duration(stop - start1); - std::cout << "Time taken for hipGraphsManual with Init: " - << std::chrono::duration_cast(resultWithInit).count() - << " milliseconds without Init:" - << std::chrono::duration_cast(resultWithoutInit).count() - << " milliseconds " << std::endl; - - hipGraph_t clonedGraph; - hipGraphExec_t clonedGraphExec; - HIPCHECK(hipGraphClone(&clonedGraph, graph)); - - HIPCHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, NULL, NULL, 0)); - - for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { - HIPCHECK(hipGraphLaunch(clonedGraphExec, streamForGraph)); - } - HIPCHECK(hipStreamSynchronize(streamForGraph)); - if (result_h_cpu != result_h) { - printf("Cloned graph final reduced sum = %lf %lf\n", result_h_cpu, result_h); - return false; - } - - HIPCHECK(hipGraphExecDestroy(graphExec)); - HIPCHECK(hipGraphExecDestroy(clonedGraphExec)); - HIPCHECK(hipGraphDestroy(graph)); - HIPCHECK(hipStreamDestroy(streamForGraph)); - - return true; -} - -int main(int argc, char** argv) { - size_t size = 1 << 12; - size_t maxBlocks = 512; - hipSetDevice(0); - printf("%zu elements\n", size); - printf("threads per block = %d\n", THREADS_PER_BLOCK); - printf("Graph Launch iterations = %d\n", GRAPH_LAUNCH_ITERATIONS); - float *inputVec_d = NULL, *inputVec_h = NULL; - double *outputVec_d = NULL, *result_d; - inputVec_h = (float*)malloc(sizeof(float) * size); - HIPCHECK(hipMalloc(&inputVec_d, sizeof(float) * size)); - HIPCHECK(hipMalloc(&outputVec_d, sizeof(double) * maxBlocks)); - HIPCHECK(hipMalloc(&result_d, sizeof(double))); - init_input(inputVec_h, size); - bool status = hipGraphsManual(inputVec_h, inputVec_d, outputVec_d, result_d, size, maxBlocks); - HIPCHECK(hipFree(inputVec_d)); - HIPCHECK(hipFree(outputVec_d)); - HIPCHECK(hipFree(result_d)); - if (!status) { - failed("Failed during hip graph manual\n"); - } - passed(); -} \ No newline at end of file diff --git a/tests/src/runtimeApi/graph/hipGraph.cpp b/tests/src/runtimeApi/graph/hipGraph.cpp deleted file mode 100644 index 5a2305056b..0000000000 --- a/tests/src/runtimeApi/graph/hipGraph.cpp +++ /dev/null @@ -1,359 +0,0 @@ -/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include -#include -#include -#include -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ -#define THREADS_PER_BLOCK 512 -#define GRAPH_LAUNCH_ITERATIONS 1000 -__global__ void reduce(float* d_in, double* d_out, size_t inputSize, size_t outputSize) { - int myId = threadIdx.x + blockDim.x * blockIdx.x; - int tid = threadIdx.x; - for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { - if (tid < s) { - d_in[myId] += d_in[myId + s]; - } - __syncthreads(); - } - if (tid == 0) { - d_out[blockIdx.x] = d_in[myId]; - } -} -__global__ void reduceFinal(double* d_in, double* d_out, size_t inputSize) { - int myId = threadIdx.x + blockDim.x * blockIdx.x; - int tid = threadIdx.x; - for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { - if (tid < s) { - d_in[myId] += d_in[myId + s]; - } - __syncthreads(); - } - if (tid == 0) { - *d_out = d_in[myId]; - } -} -void init_input(float* a, size_t size) { - for (size_t i = 0; i < size; i++) a[i] = (rand() & 0xFF) / (float)RAND_MAX; -} - -bool hipWithoutGraphs(float* inputVec_h, float* inputVec_d, double* outputVec_d, double* result_d, - size_t inputSize, size_t numOfBlocks) { - hipStream_t stream1, stream2, stream3; - hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; - double result_h = 0.0; - HIPCHECK(hipStreamCreate(&stream1)); - HIPCHECK(hipStreamCreate(&stream2)); - HIPCHECK(hipStreamCreate(&stream3)); - HIPCHECK(hipEventCreate(&forkStreamEvent)); - HIPCHECK(hipEventCreate(&memsetEvent1)); - HIPCHECK(hipEventCreate(&memsetEvent2)); - auto start = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { - HIPCHECK(hipMemcpyAsync(inputVec_d, inputVec_h, sizeof(float) * inputSize, hipMemcpyDefault, - stream1)); - HIPCHECK(hipMemsetAsync(outputVec_d, 0, sizeof(double) * numOfBlocks, stream2)); - HIPCHECK(hipEventRecord(memsetEvent1, stream2)); - HIPCHECK(hipMemsetAsync(result_d, 0, sizeof(double), stream3)); - HIPCHECK(hipEventRecord(memsetEvent2, stream3)); - HIPCHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); - hipLaunchKernelGGL(reduce, dim3(inputSize / THREADS_PER_BLOCK, 1, 1), - dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, inputVec_d, outputVec_d, - inputSize, numOfBlocks); - HIPCHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); - hipLaunchKernelGGL(reduceFinal, dim3(1, 1, 1), dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, - outputVec_d, result_d, numOfBlocks); - HIPCHECK(hipMemcpyAsync(&result_h, result_d, sizeof(double), hipMemcpyDefault, stream1)); - HIPCHECK(hipStreamSynchronize(stream1)); - } - auto stop = std::chrono::high_resolution_clock::now(); - auto result = std::chrono::duration(stop - start); - std::cout << "Time taken for hipWithoutGraphs : " - << std::chrono::duration_cast(result).count() - << " millisecs " << std::endl; - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipStreamDestroy(stream2)); - HIPCHECK(hipStreamDestroy(stream3)); - double result_h_cpu = 0.0; - for (int i = 0; i < inputSize; i++) { - result_h_cpu += inputVec_h[i]; - } - if (result_h_cpu != result_h) { - printf("Final reduced sum = %lf %lf\n", result_h_cpu, result_h); - return false; - } - return true; -} - -typedef struct callBackData { - const char* fn_name; - double* data; -} callBackData_t; -double result_gpu = 0.0; -void myHostNodeCallback(void* data) { - static int iter = 0; - iter++; - // Check status of GPU after stream operations are done - callBackData_t* tmp = (callBackData_t*)(data); - // checkCudaErrors(tmp->status); - double* result = (double*)(tmp->data); - char* function = (char*)(tmp->fn_name); - if (iter == GRAPH_LAUNCH_ITERATIONS) - printf("[%s] Host callback final reduced sum = %lf\n", function, *result); - result_gpu = *result; - *result = 0.0; // reset the result -} - -bool hipGraphsUsingStreamCapture(float* inputVec_h, float* inputVec_d, double* outputVec_d, - double* result_d, size_t inputSize, size_t numOfBlocks) { - hipStream_t stream1, stream2, stream3, streamForGraph; - hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; - hipGraph_t graph; - double result_h = 0.0; - HIPCHECK(hipStreamCreate(&stream1)); - HIPCHECK(hipStreamCreate(&stream2)); - HIPCHECK(hipStreamCreate(&stream3)); - HIPCHECK(hipStreamCreate(&streamForGraph)); - HIPCHECK(hipEventCreate(&forkStreamEvent)); - HIPCHECK(hipEventCreate(&memsetEvent1)); - HIPCHECK(hipEventCreate(&memsetEvent2)); - auto start = std::chrono::high_resolution_clock::now(); - HIPCHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); - HIPCHECK(hipEventRecord(forkStreamEvent, stream1)); - HIPCHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); - HIPCHECK(hipStreamWaitEvent(stream3, forkStreamEvent, 0)); - HIPCHECK( - hipMemcpyAsync(inputVec_d, inputVec_h, sizeof(float) * inputSize, hipMemcpyDefault, stream1)); - HIPCHECK(hipMemsetAsync(outputVec_d, 0, sizeof(double) * numOfBlocks, stream2)); - HIPCHECK(hipEventRecord(memsetEvent1, stream2)); - HIPCHECK(hipMemsetAsync(result_d, 0, sizeof(double), stream3)); - HIPCHECK(hipEventRecord(memsetEvent2, stream3)); - HIPCHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); - hipLaunchKernelGGL(reduce, dim3(inputSize / THREADS_PER_BLOCK, 1, 1), - dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, inputVec_d, outputVec_d, inputSize, - numOfBlocks); - HIPCHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); - hipLaunchKernelGGL(reduceFinal, dim3(1, 1, 1), dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, - outputVec_d, result_d, numOfBlocks); - HIPCHECK(hipMemcpyAsync(&result_h, result_d, sizeof(double), hipMemcpyDefault, stream1)); - HIPCHECK(hipStreamEndCapture(stream1, &graph)); - hipGraphNode_t* nodes = NULL; - size_t numNodes = 0; - HIPCHECK(hipGraphGetNodes(graph, nodes, &numNodes)); - printf("\nNum of nodes in the graph created using stream capture API = %zu\n", numNodes); - HIPCHECK(hipGraphGetRootNodes(graph, nodes, &numNodes)); - printf("Num of root nodes in the graph created using stream capture API = %zu\n", numNodes); - hipGraphExec_t graphExec; - - HIPCHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - auto start1 = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { - HIPCHECK(hipGraphLaunch(graphExec, streamForGraph)); - } - HIPCHECK(hipStreamSynchronize(streamForGraph)); - auto stop = std::chrono::high_resolution_clock::now(); - auto resultWithInit = std::chrono::duration(stop - start); - auto resultWithoutInit = std::chrono::duration(stop - start1); - std::cout << "Time taken for hipGraphsUsingStreamCapture with Init: " - << std::chrono::duration_cast(resultWithInit).count() - << " milliseconds without Init:" - << std::chrono::duration_cast(resultWithoutInit).count() - << " milliseconds " << std::endl; - - HIPCHECK(hipGraphExecDestroy(graphExec)); - HIPCHECK(hipGraphDestroy(graph)); - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipStreamDestroy(stream2)); - HIPCHECK(hipStreamDestroy(stream3)); - HIPCHECK(hipStreamDestroy(streamForGraph)); - double result_h_cpu = 0.0; - for (int i = 0; i < inputSize; i++) { - result_h_cpu += inputVec_h[i]; - } - if (result_h_cpu != result_h) { - printf("Final reduced sum = %lf %lf\n", result_h_cpu, result_h); - return false; - } - return true; -} -bool hipGraphsManual(float* inputVec_h, float* inputVec_d, double* outputVec_d, double* result_d, - size_t inputSize, size_t numOfBlocks) { - hipStream_t streamForGraph; - hipGraph_t graph; - std::vector nodeDependencies; - hipGraphNode_t memcpyNode, kernelNode, memsetNode; - double result_h = 0.0; - HIPCHECK(hipStreamCreate(&streamForGraph)); - auto start = std::chrono::high_resolution_clock::now(); - hipKernelNodeParams kernelNodeParams = {0}; - hipMemsetParams memsetParams = {0}; - memsetParams.dst = (void*)outputVec_d; - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(float); - memsetParams.width = numOfBlocks * 2; - memsetParams.height = 1; - HIPCHECK(hipGraphCreate(&graph, 0)); - HIPCHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, inputVec_d, inputVec_h, - sizeof(float) * inputSize, hipMemcpyHostToDevice)); - HIPCHECK(hipGraphAddMemsetNode(&memsetNode, graph, NULL, 0, &memsetParams)); - nodeDependencies.push_back(memsetNode); - nodeDependencies.push_back(memcpyNode); - void* kernelArgs[4] = {(void*)&inputVec_d, (void*)&outputVec_d, &inputSize, &numOfBlocks}; - kernelNodeParams.func = (void*)reduce; - kernelNodeParams.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1); - kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = (void**)kernelArgs; - kernelNodeParams.extra = NULL; - HIPCHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &kernelNodeParams)); - nodeDependencies.clear(); - nodeDependencies.push_back(kernelNode); - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = result_d; - memsetParams.value = 0; - memsetParams.elementSize = sizeof(float); - memsetParams.width = 2; - memsetParams.height = 1; - HIPCHECK(hipGraphAddMemsetNode(&memsetNode, graph, NULL, 0, &memsetParams)); - nodeDependencies.push_back(memsetNode); - memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = (void*)reduceFinal; - kernelNodeParams.gridDim = dim3(1, 1, 1); - kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - kernelNodeParams.sharedMemBytes = 0; - void* kernelArgs2[3] = {(void*)&outputVec_d, (void*)&result_d, &numOfBlocks}; - kernelNodeParams.kernelParams = kernelArgs2; - kernelNodeParams.extra = NULL; - HIPCHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &kernelNodeParams)); - nodeDependencies.clear(); - nodeDependencies.push_back(kernelNode); - HIPCHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &result_h, result_d, sizeof(double), - hipMemcpyDeviceToHost)); - nodeDependencies.clear(); - nodeDependencies.push_back(memcpyNode); - - hipGraphNode_t hostNode; - hipHostNodeParams hostParams = {0}; - hostParams.fn = myHostNodeCallback; - callBackData_t hostFnData; - hostFnData.data = &result_h; - hostFnData.fn_name = "hipGraphsManual"; - hostParams.userData = &hostFnData; - - HIPCHECK(hipGraphAddHostNode(&hostNode, graph, nodeDependencies.data(), nodeDependencies.size(), - &hostParams)); - - hipGraphExec_t graphExec; - hipGraphNode_t* nodes = NULL; - size_t numNodes = 0; - HIPCHECK(hipGraphGetNodes(graph, nodes, &numNodes)); - printf("\nNum of nodes in the graph created using hipGraphsManual API = %zu\n", numNodes); - HIPCHECK(hipGraphGetRootNodes(graph, nodes, &numNodes)); - printf("Num of root nodes in the graph created using hipGraphsManual API = %zu\n", numNodes); - HIPCHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - - hipGraph_t clonedGraph; - hipGraphExec_t clonedGraphExec; - HIPCHECK(hipGraphClone(&clonedGraph, graph)); - - HIPCHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, NULL, NULL, 0)); - - auto start1 = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { - HIPCHECK(hipGraphLaunch(graphExec, streamForGraph)); - } - HIPCHECK(hipStreamSynchronize(streamForGraph)); - auto stop = std::chrono::high_resolution_clock::now(); - auto resultWithInit = std::chrono::duration(stop - start); - auto resultWithoutInit = std::chrono::duration(stop - start1); - std::cout << "Time taken for hipGraphsManual with Init: " - << std::chrono::duration_cast(resultWithInit).count() - << " milliseconds without Init:" - << std::chrono::duration_cast(resultWithoutInit).count() - << " milliseconds " << std::endl; - - printf("\n\nCloned Graph Output.. \n"); - - hipGraphNode_t clonedNode; - hipGraphNodeFindInClone(&clonedNode, memcpyNode, clonedGraph); - - hipGraphNodeType clonedNodeType, origNodeType; - hipGraphNodeGetType(clonedNode, &clonedNodeType); - hipGraphNodeGetType(memcpyNode, &origNodeType); - - std::cout << "Original node type:" << origNodeType << " cloned node type:" << clonedNodeType - << std::endl; - - for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { - HIPCHECK(hipGraphLaunch(clonedGraphExec, streamForGraph)); - } - HIPCHECK(hipStreamSynchronize(streamForGraph)); - HIPCHECK(hipGraphExecDestroy(graphExec)); - HIPCHECK(hipGraphDestroy(graph)); - HIPCHECK(hipStreamDestroy(streamForGraph)); - double result_h_cpu = 0.0; - for (int i = 0; i < inputSize; i++) { - result_h_cpu += inputVec_h[i]; - } - if (result_h_cpu != result_gpu) { - printf("Final reduced sum = %lf %lf\n", result_h_cpu, result_gpu); - return false; - } - return true; -} -int main(int argc, char** argv) { - size_t size = 1 << 12; - size_t maxBlocks = 512; - hipSetDevice(0); - printf("%zu elements\n", size); - printf("threads per block = %d\n", THREADS_PER_BLOCK); - printf("Graph Launch iterations = %d\n", GRAPH_LAUNCH_ITERATIONS); - float *inputVec_d = NULL, *inputVec_h = NULL; - double *outputVec_d = NULL, *result_d; - inputVec_h = (float*)malloc(sizeof(float) * size); - HIPCHECK(hipMalloc(&inputVec_d, sizeof(float) * size)); - HIPCHECK(hipMalloc(&outputVec_d, sizeof(double) * maxBlocks)); - HIPCHECK(hipMalloc(&result_d, sizeof(double))); - init_input(inputVec_h, size); - bool status1 = hipWithoutGraphs(inputVec_h, inputVec_d, outputVec_d, result_d, size, maxBlocks); - bool status2 = hipGraphsManual(inputVec_h, inputVec_d, outputVec_d, result_d, size, maxBlocks); - bool status3 = - hipGraphsUsingStreamCapture(inputVec_h, inputVec_d, outputVec_d, result_d, size, maxBlocks); - - HIPCHECK(hipFree(inputVec_d)); - HIPCHECK(hipFree(outputVec_d)); - HIPCHECK(hipFree(result_d)); - if (!status1) { - failed("Failed during hip without graph\n"); - } - if (!status2) { - failed("Failed during hip graph manual\n"); - } - if (!status3) { - failed("Failed during hipGraph with capture\n"); - } - passed(); -} diff --git a/tests/src/runtimeApi/graph/hipSimpleGraphWithKernel.cpp b/tests/src/runtimeApi/graph/hipSimpleGraphWithKernel.cpp deleted file mode 100644 index 7baaab3f76..0000000000 --- a/tests/src/runtimeApi/graph/hipSimpleGraphWithKernel.cpp +++ /dev/null @@ -1,142 +0,0 @@ -#include -#include -#include "hip/hip_runtime.h" -#include -#include -#include -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ -#define N 1024 * 1024 -#define NSTEP 1000 -#define NKERNEL 25 -#define CONSTANT 5.34 - -__global__ void simpleKernel(float* out_d, float* in_d) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < N) out_d[idx] = CONSTANT * in_d[idx]; -} - -bool hipTestWithGraph() { - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - float *in_h, *out_h; - in_h = new float[N]; - out_h = new float[N]; - for (int i = 0; i < N; i++) { - in_h[i] = i; - } - - float *in_d, *out_d; - HIPCHECK(hipMalloc(&in_d, N * sizeof(float))); - HIPCHECK(hipMalloc(&out_d, N * sizeof(float))); - HIPCHECK(hipMemcpy(in_d, in_h, N * sizeof(float), hipMemcpyHostToDevice)); - - auto start = std::chrono::high_resolution_clock::now(); - // start CPU wallclock timer - bool graphCreated = false; - hipGraph_t graph; - hipGraphExec_t instance; - - hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal); - for (int ikrnl = 0; ikrnl < NKERNEL; ikrnl++) { - simpleKernel<<>>(out_d, in_d); - } - hipStreamEndCapture(stream, &graph); - hipGraphInstantiate(&instance, graph, NULL, NULL, 0); - - auto start1 = std::chrono::high_resolution_clock::now(); - for (int istep = 0; istep < NSTEP; istep++) { - hipGraphLaunch(instance, stream); - hipStreamSynchronize(stream); - } - auto stop = std::chrono::high_resolution_clock::now(); - auto resultWithInit = std::chrono::duration(stop - start); - auto resultWithoutInit = std::chrono::duration(stop - start1); - std::cout << "Time taken for graph with Init: " - << std::chrono::duration_cast(resultWithInit).count() - << " milliseconds without Init:" - << std::chrono::duration_cast(resultWithoutInit).count() - << " milliseconds " << std::endl; - - HIPCHECK(hipMemcpy(out_h, out_d, N * sizeof(float), hipMemcpyDeviceToHost)); - for (int i = 0; i < N; i++) { - if (float(in_h[i] * CONSTANT) != out_h[i]) { - return false; - } - } - delete[] in_h; - delete[] out_h; - HIPCHECK(hipFree(in_d)); - HIPCHECK(hipFree(out_d)); - return true; -} - -bool hipTestWithoutGraph() { - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); - printf("info: running on device #%d %s\n", deviceId, props.name); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - float *in_h, *out_h; - in_h = new float[N]; - out_h = new float[N]; - for (int i = 0; i < N; i++) { - in_h[i] = i; - } - - float *in_d, *out_d; - HIPCHECK(hipMalloc(&in_d, N * sizeof(float))); - HIPCHECK(hipMalloc(&out_d, N * sizeof(float))); - HIPCHECK(hipMemcpy(in_d, in_h, N * sizeof(float), hipMemcpyHostToDevice)); - - // start CPU wallclock timer - auto start = std::chrono::high_resolution_clock::now(); - for (int istep = 0; istep < NSTEP; istep++) { - for (int ikrnl = 0; ikrnl < NKERNEL; ikrnl++) { - simpleKernel<<>>(out_d, in_d); - } - HIPCHECK(hipStreamSynchronize(stream)); - } - auto stop = std::chrono::high_resolution_clock::now(); - auto result = std::chrono::duration(stop - start); - std::cout << "Time taken for test without graph: " - << std::chrono::duration_cast(result).count() - << " millisecs " << std::endl; - HIPCHECK(hipMemcpy(out_h, out_d, N * sizeof(float), hipMemcpyDeviceToHost)); - for (int i = 0; i < N; i++) { - if (float(in_h[i] * CONSTANT) != out_h[i]) { - return false; - } - } - delete[] in_h; - delete[] out_h; - HIPCHECK(hipFree(in_d)); - HIPCHECK(hipFree(out_d)); - return true; -} - -int main(int argc, char* argv[]) { - bool status1, status2; - status1 = hipTestWithoutGraph(); - status2 = hipTestWithGraph(); - if (!status1) { - failed("Failed during test with hip graph\n"); - } - if (!status2) { - failed("Failed during test without graph\n"); - } - passed(); -} diff --git a/tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp b/tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp deleted file mode 100644 index fe25514384..0000000000 --- a/tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp +++ /dev/null @@ -1,258 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Test for transferring data beween devices using host pinned memory - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t d2dMemCpyWithPinnedHostMemory_pinned --pinned - * TEST_NAMED: %t d2dMemCpyWithPinnedHostMemory_registered --registered - * HIT_END - */ - -#include -#include "test_common.h" -#define N 1000000 - -enum MallopinType {mallocPinned, mallocRegistered, mallocNone}; - -MallopinType p_malloc_mode = mallocNone; - -int *Ad0{nullptr}, *Bd0{nullptr}, *Cd0{nullptr}, *Ad1{nullptr}, - *Bd1{nullptr}, *Cd1{nullptr}; -int *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}; - -void memAllocate(MallopinType pinType) { - size_t Nbytes = N * sizeof(int); - if (pinType == mallocPinned) { - std::cout << "Allocating pinned host memory\n"; - HIPCHECK(hipHostMalloc(reinterpret_cast(&A_h), Nbytes)); - HIPCHECK(hipHostMalloc(reinterpret_cast(&B_h), Nbytes)); - HIPCHECK(hipHostMalloc(reinterpret_cast(&C_h), Nbytes)); - - } else { - std::cout << "Allocating registered host memory\n"; - A_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(hipHostRegister(A_h, Nbytes, hipHostRegisterDefault)); - B_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(hipHostRegister(B_h, Nbytes, hipHostRegisterDefault)); - C_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(hipHostRegister(C_h, Nbytes, hipHostRegisterDefault)); - } - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMalloc(&Ad0, Nbytes)); - HIPCHECK(hipMalloc(&Bd0, Nbytes)); - HIPCHECK(hipMalloc(&Cd0, Nbytes)); -} - -void memClear(MallopinType pinType) { - if (pinType == mallocPinned) { - HIPCHECK(hipHostFree(A_h)); - HIPCHECK(hipHostFree(B_h)); - HIPCHECK(hipHostFree(C_h)); - } else { - HIPCHECK(hipHostUnregister(A_h)); - free(A_h); - HIPCHECK(hipHostUnregister(B_h)); - free(B_h); - HIPCHECK(hipHostUnregister(C_h)); - free(C_h); - } - - HIPCHECK(hipFree(Ad0)); - HIPCHECK(hipFree(Bd0)); - HIPCHECK(hipFree(Cd0)); - HIPCHECK(hipFree(Ad1)); - HIPCHECK(hipFree(Bd1)); - HIPCHECK(hipFree(Cd1)); -} - -bool testMemCopy(int gpuCnt, MallopinType pinType) { - size_t Nbytes = N * sizeof(int); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - memAllocate(pinType); - - for (int i = 0; i < N; i++) { - A_h[i] = i; - B_h[i] = i; - } - - HIPCHECK(hipMemcpy(Ad0, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd0, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, 0, static_cast(Ad0), - static_cast(Bd0), Cd0, N); - - HIPCHECK(hipMemcpy(C_h, Cd0, Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - unsigned int seed = time(0); - HIPCHECK(hipSetDevice(rand_r(&seed) % (gpuCnt-1)+1)); - - int device; - hipGetDevice(&device); - std::cout <<"hipMemcpy is set to happen between device 0 and device " - <(Ad1), - static_cast(Bd1), Cd1, N); - - HIPCHECK(hipMemcpy(C_h, Cd1, Nbytes, hipMemcpyDeviceToHost)); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - memClear(pinType); - return true; -} - -bool testMemCopyAsync(int gpuCnt, MallopinType pinType) { - size_t Nbytes = N * sizeof(int); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - memAllocate(pinType); - - for (int i = 0; i < N; i++) { - A_h[i] = i; - B_h[i] = i; - } - - HIPCHECK(hipMemcpy(Ad0, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd0, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, 0, static_cast(Ad0), - static_cast(Bd0), Cd0, N); - - HIPCHECK(hipMemcpy(C_h, Cd0, Nbytes, hipMemcpyDeviceToHost)); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - // Setting random gpu from all available gpus except gpu0 - unsigned int seed = time(0); - HIPCHECK(hipSetDevice(rand_r(&seed) % (gpuCnt-1)+1)); - - int device; - hipGetDevice(&device); - std::cout <<"hipMemcpyAsync is set to happen between device 0 and device " - <(Ad1), - static_cast(Bd1), Cd1, N); - - HIPCHECK(hipMemcpyAsync(C_h, Cd1, Nbytes, hipMemcpyDeviceToHost, gpu1Stream)); - HIPCHECK(hipStreamSynchronize(gpu1Stream)); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - memClear(pinType); - HIPCHECK(hipStreamDestroy(gpu1Stream)); - return true; -} - -int parseStandardArguments(int argc, char* argv[]) { - for (int i = 1; i < argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, " ")) { - // skip NULL args. - } else if (!strcmp(arg, "--pinned")) { - p_malloc_mode = mallocPinned; - } else if (!strcmp(arg, "--registered")) { - p_malloc_mode = mallocRegistered; - } else { - failed("Bad argument '%s'", arg); - } - } - return 0; -} - -int main(int argc, char* argv[]) { - bool testResult1 = true; - bool testResult2 = true; - parseStandardArguments(argc, argv); - - if (p_malloc_mode == mallocNone) { - std::cout << "info: invalid malloc type. Empty pass\n"; - passed(); - } - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - testResult1 &= testMemCopy(numDevices, p_malloc_mode); - if (!(testResult1)) { - std::cout << "d2d failed with hipMemcpy using pinned host buffers\n"; - } - - testResult2 &= testMemCopyAsync(numDevices, p_malloc_mode); - if (!(testResult2)) { - std::cout << "d2d failed with hipMemcpyAsync using pinned host buffers\n"; - } - - if (testResult1 && testResult2) { - passed(); - } else { - failed("One or more tests failed\n"); - } - } else { - std::cout << "Machine does not have more than one gpu, Empty Pass" - << std::endl; - passed(); - } -} diff --git a/tests/src/runtimeApi/memory/hipArray.cpp b/tests/src/runtimeApi/memory/hipArray.cpp deleted file mode 100644 index 039a002a02..0000000000 --- a/tests/src/runtimeApi/memory/hipArray.cpp +++ /dev/null @@ -1,297 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM all - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -void printSep() { - printf( - "======================================================================================\n"); -} - -inline void initMemCpyParam2D(hip_Memcpy2D &ins, const size_t dpitch, - const size_t spitch, const size_t width, - const size_t height, hipMemoryType dstType, - enum hipMemoryType srcType) { - ins.srcXInBytes=0; - ins.srcY=0; - ins.srcPitch=spitch; - ins.dstXInBytes=0; - ins.dstY=0; - ins.dstPitch=dpitch; - ins.WidthInBytes=width; - ins.Height=height; - ins.dstMemoryType= dstType; - ins.srcMemoryType= srcType; -} - -//--- -// Test copies of a matrix numW by numH -// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and -// checks the result. -// -// IN: numW: number of elements in the 1st dimension used for allocation -// IN: numH: number of elements in the 2nd dimension used for allocation -// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host -// memory with malloc. -// -template -void memcpy2Dtest(size_t numW, size_t numH, bool usePinnedHost) { - size_t width = numW * sizeof(T); - size_t sizeElements = width * numH; - - printf("memcpy2Dtest: %s<%s> size=%lu (%6.2fMB) W: %d, H:%d, usePinnedHost: %d\n", __func__, - TYPENAME(T), sizeElements, sizeElements / 1024.0 / 1024.0, (int)numW, (int)numH, - usePinnedHost); - - T *A_d, *B_d, *C_d; - T *A_h, *B_h, *C_h; - - size_t pitch_A, pitch_B, pitch_C; - - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HipTest::initArrays2DPitch(&A_d, &B_d, &C_d, &pitch_A, &pitch_B, &pitch_C, numW, numH); - HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW * numH, usePinnedHost); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH); - - HIPCHECK(hipMemcpy2D(A_d, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice)); - hip_Memcpy2D ins; - initMemCpyParam2D(ins,pitch_B,width,width,numH,hipMemoryTypeDevice,hipMemoryTypeHost); - ins.dstDevice = (hipDeviceptr_t)B_d; - ins.srcHost = B_h; - HIPCHECK(hipMemcpyParam2D(&ins)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, - (pitch_C / sizeof(T)) * numH); - - HIPCHECK(hipMemcpy2D(C_h, width, C_d, pitch_C, width, numH, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, numW * numH); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost); - - printf(" %s success\n", __func__); -} - -//--- -// Test copies of a matrix numW by numH into a hipArray data structure -// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and -// checks the result. -// -// IN: numW: number of elements in the 1st dimension used for allocation -// IN: numH: number of elements in the 2nd dimension used for allocation. If this is 1, then the -// 1-dimensional copy API -// would be used -// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host -// memory with malloc. IN: usePitch: If true, pads additional memory. This is only valid in the -// 2-dimensional case -// -template -void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch = false) { - size_t width = numW * sizeof(T); - size_t sizeElements = width * numH; - - printf( - "memcpyArraytest: %s<%s> size=%lu (%6.2fMB) W: %d, H: %d, usePinnedHost: %d, usePitch: " - "%d\n", - __func__, TYPENAME(T), sizeElements, sizeElements / 1024.0 / 1024.0, (int)numW, (int)numH, - usePinnedHost, usePitch); - - hipArray *A_d, *B_d, *C_d; - T *A_h, *B_h, *C_h; - - // 1D - if ((numW >= 1) && (numH == 1)) { - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, 1, 0); - HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW * numH, usePinnedHost); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH); - - HIPCHECK(hipMemcpyToArray(A_d, 0, 0, (void*)A_h, width, hipMemcpyHostToDevice)); - hip_Memcpy2D ins; - initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost); - ins.dstArray = B_d; - ins.srcHost = B_h; - HIPCHECK(hipMemcpyParam2D(&ins)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW); - - HIPCHECK(hipMemcpy(C_h, C_d->data, width, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, numW); - - } - // 2D - else if ((numW >= 1) && (numH >= 1)) { - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, numH, 0); - HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW * numH, usePinnedHost); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW * numH); - - if (usePitch) { - T *A_p, *B_p, *C_p; - size_t pitch_A, pitch_B, pitch_C; - - HipTest::initArrays2DPitch(&A_p, &B_p, &C_p, &pitch_A, &pitch_B, &pitch_C, numW, numH); - HIPCHECK(hipMemcpy2D(A_p, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy2D(B_p, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice)); - - HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, (void*)A_p, pitch_A, width, numH, - hipMemcpyDeviceToDevice)); - HIPCHECK(hipMemcpy2DToArray(B_d, 0, 0, (void*)B_p, pitch_B, width, numH, - hipMemcpyDeviceToDevice)); - - hipFree(A_p); - hipFree(B_p); - hipFree(C_p); - } else { - HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, (void*)A_h, width, width, numH, - hipMemcpyHostToDevice)); - hip_Memcpy2D ins; - initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeArray,hipMemoryTypeHost); - ins.dstArray = B_d; - ins.srcHost = B_h; - HIPCHECK(hipMemcpyParam2D(&ins)); - } - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW * numH); - printf("memcpy srcArray to dstHost\n"); - hip_Memcpy2D ins; - initMemCpyParam2D(ins,width,width,width,numH,hipMemoryTypeHost,hipMemoryTypeArray); - ins.srcArray = C_d; - ins.dstHost = C_h; - HIPCHECK(hipMemcpyParam2D(&ins)); - - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, numW * numH); - } - // Unknown - else { - HIPASSERT("Incompatible dimensions" && 0); - } - - hipFreeArray(A_d); - hipFreeArray(B_d); - hipFreeArray(C_d); - HipTest::freeArraysForHost(A_h, B_h, C_h, usePinnedHost); - - printf(" %s success\n", __func__); -} - -//--- -// Try many different sizes to memory copy. -template -void memcpyArraytest_size(size_t maxElem = 0, size_t offset = 0) { - printf("test: %s<%s>\n", __func__, TYPENAME(T)); - - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - - size_t free, total; - HIPCHECK(hipMemGetInfo(&free, &total)); - - if (maxElem == 0) { - maxElem = free / sizeof(T) / 5; - } - - printf( - " device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB " - "offset=%lu\n", - deviceId, free, (float)(free / 1024.0 / 1024.0), total, (float)(total / 1024.0 / 1024.0), - maxElem * sizeof(T) / 1024.0 / 1024.0, offset); - - // Test 1D - for (size_t elem = 64; elem + offset <= maxElem; elem *= 2) { - HIPCHECK(hipDeviceReset()); - memcpyArraytest(elem + offset, 1, 0); // unpinned host - HIPCHECK(hipDeviceReset()); - memcpyArraytest(elem + offset, 1, 1); // pinned host - } - - // Test 2D - size_t maxElem2D = sqrt(maxElem); - - for (size_t elem = 64; elem + offset <= maxElem2D; elem *= 2) { - HIPCHECK(hipDeviceReset()); - memcpyArraytest(elem + offset, elem + offset, 0, 1); // use pitch - } -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - printf("info: set device to %d\n", p_gpuDevice); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - if (p_tests & 0x1) { - printf("\n\n=== tests&1 (types)\n"); - printSep(); - HIPCHECK(hipDeviceReset()); - memcpy2Dtest(321, 211, 0); - memcpy2Dtest(322, 211, 0); - memcpy2Dtest(320, 211, 0); - memcpy2Dtest(323, 211, 0); - printf("===\n\n\n"); - - printf("\n\n=== tests&1 (types)\n"); - printSep(); - // 2D - memcpyArraytest(320, 211, 0, 0); - memcpyArraytest(322, 211, 0, 0); - memcpyArraytest(320, 211, 0, 0); - memcpyArraytest(320, 211, 0, 1); - memcpyArraytest(322, 211, 0, 1); - memcpyArraytest(320, 211, 0, 1); - printSep(); - // 1D - memcpyArraytest(320, 1, 0); - memcpyArraytest(322, 1, 0); - memcpyArraytest(320, 1, 0); - printf("===\n\n\n"); - } - - if (p_tests & 0x4) { - printf("\n\n=== tests&4 (test sizes and offsets)\n"); - printSep(); - HIPCHECK(hipDeviceReset()); - printSep(); - memcpyArraytest_size(0, 0); - printSep(); - memcpyArraytest_size(0, 64); - printSep(); - memcpyArraytest_size(1024 * 1024, 13); - printSep(); - memcpyArraytest_size(1024 * 1024, 50); - } - - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipHostGetFlags.cpp b/tests/src/runtimeApi/memory/hipHostGetFlags.cpp deleted file mode 100644 index 85dc76960f..0000000000 --- a/tests/src/runtimeApi/memory/hipHostGetFlags.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include - -#define LEN 1024 * 1024 -#define SIZE LEN * sizeof(float) - -__global__ void Add(float* Ad, float* Bd, float* Cd) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - Cd[tx] = Ad[tx] + Bd[tx]; -} - -int main() { - float *A, *B, *C, *D; - float *Ad, *Bd, *Cd, *Dd; - unsigned int FlagA, FlagB, FlagC; - FlagA = hipHostMallocWriteCombined | hipHostMallocMapped; - FlagB = hipHostMallocWriteCombined | hipHostMallocMapped; - FlagC = hipHostMallocMapped; - hipDeviceProp_t prop; - int device; - HIPCHECK(hipGetDevice(&device)); - HIPCHECK(hipGetDeviceProperties(&prop, device)); - if (prop.canMapHostMemory != 1) { - std::cout << "Exiting..." << std::endl; - } - HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped)); - HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped)); - HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped)); - - HIPCHECK(hipHostMalloc((void**)&D, SIZE, hipHostMallocDefault)); - - unsigned int flagA, flagB, flagC; - HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0)); - HIPCHECK(hipHostGetDevicePointer((void**)&Bd, B, 0)); - HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0)); - HIPCHECK(hipHostGetDevicePointer((void**)&Dd, D, 0)); - HIPCHECK(hipHostGetFlags(&flagA, A)); - HIPCHECK(hipHostGetFlags(&flagB, B)); - HIPCHECK(hipHostGetFlags(&flagC, C)); - - for (int i = 0; i < LEN; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - - dim3 dimGrid(LEN / 256, 1, 1); - dim3 dimBlock(256, 1, 1); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(Add), dimGrid, dimBlock, 0, 0, Ad, Bd, Cd); - - HIPCHECK( - hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost)); // Note this really HostToHost not - // DeviceToHost, since memory is mapped... - HIPASSERT(C[10] == 3.0f); - HIPASSERT(flagA == FlagA); - HIPASSERT(flagB == FlagB); - HIPASSERT(flagC == FlagC); - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipHostMalloc.cpp b/tests/src/runtimeApi/memory/hipHostMalloc.cpp deleted file mode 100644 index 7af5bfdddf..0000000000 --- a/tests/src/runtimeApi/memory/hipHostMalloc.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include "test_common.h" - -#define LEN 1024 * 1024 -#define SIZE LEN * sizeof(float) - -__global__ void Add(float* Ad, float* Bd, float* Cd) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - Cd[tx] = Ad[tx] + Bd[tx]; -} - - -__global__ void Set(int* Ad, int val) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tx] = val; -} - - -#define SYNC_EVENT 0 -#define SYNC_STREAM 1 -#define SYNC_DEVICE 2 - -std::vector syncMsg = {"event", "stream", "device"}; - -void CheckHostPointer(int numElements, int* ptr, unsigned eventFlags, int syncMethod, - std::string msg) { - std::cerr << "test: CheckHostPointer " - << msg - //<< " HIP_COHERENT_HOST_ALLOC=" << HIP_COHERENT_HOST_ALLOC - //<< " HIP_EVENT_SYS_RELEASE=" << HIP_EVENT_SYS_RELEASE - << " eventFlags = " << std::hex << eventFlags - << ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "") - << ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "") - << " ptr=" << ptr << " syncMethod=" << syncMsg[syncMethod] << "\n"; - - hipStream_t s; - hipEvent_t e; - - // Init: - HIPCHECK(hipStreamCreate(&s)); - HIPCHECK(hipEventCreateWithFlags(&e, eventFlags)) - dim3 dimBlock(64, 1, 1); - dim3 dimGrid(numElements / dimBlock.x, 1, 1); - - const int expected = 13; - - // Init array to know state: - hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, ptr, -42); - HIPCHECK(hipDeviceSynchronize()); - - hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, ptr, expected); - HIPCHECK(hipEventRecord(e, s)); - - // Host waits for event : - switch (syncMethod) { - case SYNC_EVENT: - HIPCHECK(hipEventSynchronize(e)); - break; - case SYNC_STREAM: - HIPCHECK(hipStreamSynchronize(s)); - break; - case SYNC_DEVICE: - HIPCHECK(hipDeviceSynchronize()); - break; - default: - assert(0); - }; - - for (int i = 0; i < numElements; i++) { - if (ptr[i] != expected) { - printf("mismatch at %d: %d != %d\n", i, ptr[i], expected); - assert(ptr[i] == expected); - } - } - - HIPCHECK(hipStreamDestroy(s)); - HIPCHECK(hipEventDestroy(e)); -}; - -int main() { - hipDeviceProp_t prop; - int device; - HIPCHECK(hipGetDevice(&device)); - HIPCHECK(hipGetDeviceProperties(&prop, device)); - if (prop.canMapHostMemory != 1) { - std::cout << "Exiting..." << std::endl; - failed("Does support HostPinned Memory"); - } - - - { - float *A, *B, *C; - float *Ad, *Bd, *Cd; - HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocWriteCombined | hipHostMallocMapped)); - HIPCHECK(hipHostMalloc((void**)&B, SIZE, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&C, SIZE, hipHostMallocMapped)); - - HIPCHECK(hipHostGetDevicePointer((void**)&Ad, A, 0)); - HIPCHECK(hipHostGetDevicePointer((void**)&Cd, C, 0)); - - for (int i = 0; i < LEN; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - - HIPCHECK(hipMalloc((void**)&Bd, SIZE)); - HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); - - dim3 dimGrid(LEN / 256, 1, 1); - dim3 dimBlock(256, 1, 1); - - hipLaunchKernelGGL(Add, dimGrid, dimBlock, 0, 0, Ad, Bd, Cd); - - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipHostFree(A)); - HIPCHECK(hipHostFree(B)); - HIPCHECK(hipHostFree(C)); - } - - { - int numElements = 1024 * 16; - size_t sizeBytes = numElements * sizeof(int); - -#ifdef __HIP_PLATFORM_AMD__ - { - // Stimulate error condition: - int* A = &numElements; - HIPCHECK_API(hipHostMalloc((void**)&A, sizeBytes, - hipHostMallocCoherent | hipHostMallocNonCoherent), - hipErrorInvalidValue); - - assert(A == 0); - } -#endif - - - { - int* A = nullptr; - HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocNonCoherent)); - const char* ptrType = "non-coherent"; // TODO - CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_DEVICE, ptrType); - CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_EVENT, ptrType); - - // agent-scope releases don't provide host visibility, don't use them here: - } - - if (1) { - int* A = nullptr; - if (hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent) == hipSuccess) { - const char* ptrType = "coherent"; - CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_DEVICE, ptrType); - CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_EVENT, ptrType); - - CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_DEVICE, ptrType); - CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_EVENT, ptrType); - } - else { - warn("Coherence memory allocation failed. Is SVM atomic supported?") - } - } - - - // Check defaults: - if (1) { - int* A = nullptr; - HIPCHECK(hipHostMalloc((void**)&A, sizeBytes)); - const char* ptrType = "default"; - CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType); - CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType); - - CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType); - CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType); - } - } - - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipHostMallocTests.cpp b/tests/src/runtimeApi/memory/hipHostMallocTests.cpp deleted file mode 100644 index 8388869a75..0000000000 --- a/tests/src/runtimeApi/memory/hipHostMallocTests.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* -Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - - (TestCase 1):: - 1) Test hipHostMalloc() api with ptr as nullptr and check for return value. - 2) Test hipHostMalloc() api with size as max(size_t) and check for OOM error. - 3) Test hipHostMalloc() api with flags as max(unsigned int) and validate - return value. - 4) Pass size as zero for hipHostMalloc() api and check ptr is reset with - with return value success. - -*/ - -/* HIT_START - * BUILD_CMD: %t %hc %S/%s %S/../../test_common.cpp -I%S/../../ -o %T/%t -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define NUM_BYTES 1000 - -int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - hipError_t ret; - size_t allocSize = NUM_BYTES; - char *ptr; - - // Pass ptr as nullptr. - if ((ret = hipHostMalloc(static_cast(nullptr), allocSize)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropritate error value returned for " - "ptr as nullptr. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - // Size as max(size_t). - if ((ret = hipHostMalloc(&ptr, - std::numeric_limits::max())) - != hipErrorOutOfMemory) { - printf("ArgValidation : Inappropritate error value returned for " - "max(size_t). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - // Flags as max(uint). - if ((ret = hipHostMalloc(&ptr, allocSize, - std::numeric_limits::max())) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropritate error value returned for " - "max(uint). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - // Pass size as zero and check ptr reset. - HIPCHECK(hipHostMalloc(&ptr, 0)); - if (ptr) { - TestPassed &= false; - printf("ArgValidation : ptr is not reset when size(0)\n"); - } - - if (TestPassed) { - passed(); - } else { - failed("hipHostMallocTests validation Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipHostMallocTests.h b/tests/src/runtimeApi/memory/hipHostMallocTests.h deleted file mode 100644 index 9685f78c83..0000000000 --- a/tests/src/runtimeApi/memory/hipHostMallocTests.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef TESTS_SRC_RUNTIMEAPI_MEMORY_HIPHOSTMALLOCTESTS_H_ -#define TESTS_SRC_RUNTIMEAPI_MEMORY_HIPHOSTMALLOCTESTS_H_ - -#include "test_common.h" - -/** - * @brief Error codes retured by rocm_smi_lib functions - */ -typedef enum { - RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful - RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid - RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or - //!< action is not available for the - //!< given input, on the given system - RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This - //!< may because the operation is not - //!< supported by the Linux kernel - //!< version running on the executing - //!< machine - RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file - //!< error. Many functions require - //!< root access to run. - RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other - //!< resource - RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught - RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of - //!< allowable or safe range - RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi - //!< initializing internal data - //!< structures - RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR, - RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not - //!< yet been implemented in the - //!< current system for the current - //!< devices - RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not - //!< found - RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were - //!< available for the operation - RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during - //!< execution of function - RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data - //!< was read - RSMI_STATUS_NO_DATA, //!< No data was found for a given - //!< input - RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to - //!< function is not what was expected - RSMI_STATUS_BUSY, //!< A resource or mutex could not be - //!< acquired because it is already - //!< being used - RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter - //!< exceeded INT32_MAX - - RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred -} rsmi_status_t; - - -/** - * @brief Types of memory - */ -typedef enum { - RSMI_MEM_TYPE_FIRST = 0, - - RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST, //!< VRAM memory - RSMI_MEM_TYPE_VIS_VRAM, //!< VRAM memory that is visible - RSMI_MEM_TYPE_GTT, //!< GTT memory - - RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT -} rsmi_memory_type_t; - - - -#endif // TESTS_SRC_RUNTIMEAPI_MEMORY_HIPHOSTMALLOCTESTS_H_ diff --git a/tests/src/runtimeApi/memory/hipHostRegister.cpp b/tests/src/runtimeApi/memory/hipHostRegister.cpp deleted file mode 100644 index 0121c03cfe..0000000000 --- a/tests/src/runtimeApi/memory/hipHostRegister.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -// TODO - bug if run both back-to-back, once fixed should just need one command line - -#include "test_common.h" -#include - -__global__ void Inc(float* Ad) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tx] = Ad[tx] + float(1); -} - - -template -void doMemCopy(size_t numElements, int offset, T* A, T* Bh, T* Bd, bool internalRegister) { - A = A + offset; - numElements -= offset; - - size_t sizeBytes = numElements * sizeof(T); - - if (internalRegister) { - HIPCHECK(hipHostRegister(A, sizeBytes, 0)); - } - - - // Reset - for (size_t i = 0; i < numElements; i++) { - A[i] = float(i); - Bh[i] = 0.0f; - } - - HIPCHECK(hipMemset(Bd, 13.0f, sizeBytes)); - - - // - HIPCHECK(hipMemcpy(Bd, A, sizeBytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bh, Bd, sizeBytes, hipMemcpyDeviceToHost)); - - // Make sure the copy worked - for (size_t i = 0; i < numElements; i++) { - if (Bh[i] != A[i]) { - printf("mismatch at Bh[%zu]=%f, A[%zu]=%f\n", i, Bh[i], i, A[i]); - failed("mismatch"); - }; - } - - if (internalRegister) { - HIPCHECK(hipHostUnregister(A)); - } -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - const size_t size = N * sizeof(float); - - if (p_tests & 0x1) { - float *A, **Ad; - int num_devices; - HIPCHECK(hipGetDeviceCount(&num_devices)); - Ad = new float*[num_devices]; - A = (float*)malloc(size); - HIPCHECK(hipHostRegister(A, size, 0)); - - - for (int i = 0; i < N; i++) { - A[i] = float(1); - } - - - for (int i = 0; i < num_devices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A, 0)); - } - - // Reference the registered device pointer Ad from inside the kernel: - for (int i = 0; i < num_devices; i++) { - HIPCHECK(hipSetDevice(i)); - hipLaunchKernelGGL(Inc, dim3(N / 512), dim3(512), 0, 0, Ad[i]); - - HIPCHECK(hipDeviceSynchronize()); - } - HIPASSERT(A[10] == 1.0f + float(num_devices)); - - HIPCHECK(hipHostUnregister(A)); - - free(A); - delete [] Ad; - } - - if (p_tests & 0x3) { - float *A, **Ad; - int num_devices; - HIPCHECK(hipGetDeviceCount(&num_devices)); - Ad = new float*[num_devices]; - A = (float*)malloc(size); - HIPCHECK(hipHostRegister(A, size, 0)); - - for (int i = 0; i < N; i++) { - A[i] = float(1); - } - - for (int i = 0; i < num_devices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipHostGetDevicePointer((void**)&Ad[i], A, 0)); - } - - // Reference the registered device pointer Ad in hipMemset: - for (int i = 0; i < num_devices; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMemset(Ad[i], 0, size)); - } - HIPASSERT(A[10] == 0.0f); - - HIPCHECK(hipHostUnregister(A)); - - free(A); - delete [] Ad; - } - - if (p_tests & 0x6) { - // Sensitize HIP bug if device does not match where the memory was registered. - HIPCHECK(hipSetDevice(0)); - - - float* A = (float*)malloc(size); - - // Copy to B, this should be optimal pinned malloc copy: - // Note we are using the host pointer here: - float *Bh, *Bd; - Bh = (float*)malloc(size); - HIPCHECK(hipMalloc(&Bd, size)); - - // TODO - set to 128 -#define OFFSETS_TO_TRY 128 - assert(N > OFFSETS_TO_TRY); - - if (p_tests & 0x2) { - for (size_t i = 0; i < OFFSETS_TO_TRY; i++) { - doMemCopy(N, i, A, Bh, Bd, true /*internalRegister*/); - } - } - - if (p_tests & 0x4) { - HIPCHECK(hipHostRegister(A, size, 0)); - for (size_t i = 0; i < OFFSETS_TO_TRY; i++) { - doMemCopy(N, i, A, Bh, Bd, false /*internalRegister*/); - } - HIPCHECK(hipHostUnregister(A)); - } - - - free(A); - free(Bh); - hipFree(Bd); - } - - - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipIpcMemAccessTest.cpp b/tests/src/runtimeApi/memory/hipIpcMemAccessTest.cpp deleted file mode 100644 index 18c0a282f2..0000000000 --- a/tests/src/runtimeApi/memory/hipIpcMemAccessTest.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp LINK_OPTIONS -lrt -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include "test_common.h" - -#define HIPCHECK_NO_RETURN(lastError, error) \ - { \ - if (lastError == hipSuccess) { \ - hipError_t localError = error; \ - if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ - printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \ - localError, #error, __FILE__, __LINE__, KNRM); \ - lastError = localError; \ - if (shrd_mem) \ - shrd_mem->IfTestPassed = false; \ - } \ - } \ - } - -#ifdef __linux__ -sem_t *sem_ob1 = NULL, *sem_ob2 = NULL; -typedef struct mem_handle { - int device; - hipIpcMemHandle_t memHandle; - bool IfTestPassed; -} hip_ipc_t; - -class IpcMemHandleTest { - public: - bool InitFlag = true; - hip_ipc_t *shrd_mem = NULL; - pid_t pid; - size_t N = 1024; - size_t Nbytes = N * sizeof(int); - int out = 0; - int *A_h, *C_h; - int Num_devices = 0, Data_mismatch, CanAccessPeer = 0; - IpcMemHandleTest(); - bool Test(); - ~IpcMemHandleTest(); -}; - - -bool IpcMemHandleTest::Test() { - if (InitFlag == false) { - // Abort the test if the initialization fails - printf("Resource initialization failed. Hence test skipped!"); - return false; - } - hipError_t status = hipSuccess; - - pid = fork(); - if (pid != 0) { - // Parent process - HIPCHECK_NO_RETURN(status, hipGetDeviceCount(&Num_devices)); - for (int i = 0; i < Num_devices; ++i) { - if (shrd_mem->IfTestPassed == true) { - int *A_d = NULL; - HIPCHECK_NO_RETURN(status, hipSetDevice(i)); - HIPCHECK_NO_RETURN(status, hipMalloc(&A_d, Nbytes)); - HIPCHECK_NO_RETURN(status, hipIpcGetMemHandle((hipIpcMemHandle_t *) &shrd_mem->memHandle, - A_d)); - HIPCHECK_NO_RETURN(status, hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - shrd_mem->device = i; - if ((out=sem_post(sem_ob1)) == -1) { - // Need to use inline function to release resources. - shrd_mem->IfTestPassed = false; - failed("sem_post() call failed in parent process."); - } - if ((out=sem_wait(sem_ob2)) == -1) { - shrd_mem->IfTestPassed = false; - failed("sem_wait() call failed in parent process."); - } - HIPCHECK_NO_RETURN(status, hipFree(A_d)); - } - } - } else { - // Child process - HIPCHECK_NO_RETURN(status, hipGetDeviceCount(&Num_devices)); - for (int j = 0; j < Num_devices; ++j) { - if ((out=sem_wait(sem_ob1)) == -1) { - shrd_mem->IfTestPassed = false; - printf("sem_wait() call failed in child process."); - if ((out=sem_post(sem_ob2)) == -1) { - printf("sem_post() call on sem_ob2 failed"); - exit(1); - } - } - for (int i = 0; i < Num_devices; ++i) { - Data_mismatch = 0; - int *Ad1 = NULL; - int *Ad2 = NULL; - HIPCHECK_NO_RETURN(status, hipSetDevice(i)); - HIPCHECK_NO_RETURN(status, hipMalloc(&Ad2, Nbytes)); - HIPCHECK_NO_RETURN(status, hipIpcOpenMemHandle((void **) &Ad1, shrd_mem->memHandle, - hipIpcMemLazyEnablePeerAccess)); - HIPCHECK_NO_RETURN(status, hipDeviceCanAccessPeer(&CanAccessPeer, i, shrd_mem->device)); - if (CanAccessPeer == 1) { - HIPCHECK_NO_RETURN(status, hipMemcpy(Ad2, Ad1, Nbytes, hipMemcpyDeviceToDevice)); - HIPCHECK_NO_RETURN(status, hipMemcpy(C_h, Ad2, Nbytes, hipMemcpyDeviceToHost)); - for (int i = 0; i < N; ++i) { - if (C_h[i] != 123) - Data_mismatch++; - } - if (Data_mismatch != 0) { - printf("Data mismatch found when data copied from Ipc memhandle"); - printf(" to Device: %d\n", i); - shrd_mem->IfTestPassed = false; - } - memset(reinterpret_cast(C_h), 0, Nbytes); - // Checking if the data obtained from Ipc shared memory is consistent - HIPCHECK_NO_RETURN(status, hipMemcpy(C_h, Ad1, Nbytes, hipMemcpyDeviceToHost)); - for (int i = 0; i < N; ++i) { - if (C_h[i] != 123) - Data_mismatch++; - } - if (Data_mismatch != 0) { - printf("Data mismatch found when data copied from Ipc memhandle"); - printf(" Host.\n"); - shrd_mem->IfTestPassed = false; - } - } - HIPCHECK_NO_RETURN(status, hipIpcCloseMemHandle(reinterpret_cast(Ad1))); - HIPCHECK_NO_RETURN(status, hipFree(Ad2)); - } - if ((out=sem_post(sem_ob2)) == -1) { - shrd_mem->IfTestPassed = false; - printf("sem_post() call on sem_ob2 failed"); - exit(1); - } - } - exit(0); - } - - if ((out = sem_unlink("/my-sem-object1")) == -1) { - printf("sem_unlink() call on /my-sem-object1 failed"); - } - if ((out = sem_unlink("/my-sem-object2")) == -1) { - printf("sem_unlink() call on /my-sem-object2 failed"); - } - int rFlag = 0; // return flag - waitpid(pid, &rFlag, 0); - if (shrd_mem->IfTestPassed == false) { - return false; - } else { - return true; - } -} - -IpcMemHandleTest::IpcMemHandleTest() { - std::string cmd_line = "rm -rf /dev/shm/sem.my-sem-object*"; - int res = system(cmd_line.c_str()); - if (res == -1) { - InitFlag = false; - printf("System call to remove existing shared objects failed!"); - } - int out; - if ((sem_ob1 = sem_open ("/my-sem-object1", O_CREAT|O_EXCL, 0660, 0)) == - SEM_FAILED) { - InitFlag = false; - printf("Initialization of 1st semaphore object failed"); - } - if ((sem_ob2 = sem_open ("/my-sem-object2", O_CREAT|O_EXCL, 0660, 0)) == - SEM_FAILED) { - InitFlag = false; - printf("Initialization of 2nd semaphore object failed"); - } - - shrd_mem = reinterpret_cast(mmap(NULL, sizeof(hip_ipc_t), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - 0, 0)); - if (shrd_mem == NULL) { - InitFlag = false; - printf("mmap() call failed!"); - } - shrd_mem->IfTestPassed = true; - A_h = reinterpret_cast(malloc(Nbytes)); - C_h = reinterpret_cast(malloc(Nbytes)); - for (size_t i = 0; i < N; i++) { - A_h[i] = 123; - } -} - -IpcMemHandleTest::~IpcMemHandleTest() { - munmap(shrd_mem, sizeof(hip_ipc_t)); - free(A_h); - free(C_h); -} -#endif - -int main() { - bool IfTestPassed = true; - // The following program spawns a child process and does the following - // Parent iterate through each device, create memory -- create hipIpcMemhandle - // stores the mem handle in mmaped memory, release the child using sem_post() - // and wait for child to release itself(parent process) - // child process: - // Child process get the ipc mem handle using hipIpcOpenMemHandle - // Iterate through all the available gpus and do Device to Device copies - // and check for data consistencies and close the hipIpcCloseMemHandle - // release the parent and wait for parent to release itself(child) -#ifdef __linux__ - IpcMemHandleTest obj; - IfTestPassed = obj.Test(); -#else - printf("This is not a Linux platform. Hence Skipping the test!\n"); - IfTestPassed = true; -#endif - if (IfTestPassed == false) { - failed("\n"); - } - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMallocConcurrency.cpp b/tests/src/runtimeApi/memory/hipMallocConcurrency.cpp deleted file mode 100644 index 44a4c5d086..0000000000 --- a/tests/src/runtimeApi/memory/hipMallocConcurrency.cpp +++ /dev/null @@ -1,503 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - - (TestCase 1):: - 1) Test hipMalloc() api passing zero size and confirming *ptr returning - nullptr. Also pass nullptr to hipFree() api. - 2) Pass maximum value of size_t for hipMalloc() api and make sure appropriate - error is returned. - 3) Check for hipMalloc() error code, passing invalid/null pointer. - - (TestCase 2):: - 4) Regress hipMalloc()/hipFree() in loop for bigger chunk of allocation - with adequate number of iterations and later test for kernel execution on - default gpu. - 5) Regress hipMalloc()/hipFree() in loop while allocating smaller chunks - keeping maximum number of iterations and then run kernel code on default - gpu, perfom data validation. - - (TestCase 3):: - 6) Check hipMalloc() api adaptability when app creates small chunks of memory - continuously, stores it for later use and then frees it at later point - of time. - - (TestCase 4):: - 7) Run hipMalloc() api/kernel code on same gpu parallely from parent and child - processes, validate the results. - - (TestCase 5):: - 8) Execute hipMalloc() api simultaneously on all the gpus by spawning multiple - child processes. Validate buffers allocated after running kernel code. - - (TestCase 6):: - 9) Multithread Scenario : Exercise hipMalloc() api parellely on all gpus from - multiple threads and regress the api. - - (TestCases 2, 3, 4, 5, 6):: - 10) Validate memory usage with hipMemGetInfo() while regressing hipMalloc() - api. Check for any possible memory leaks. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 LINK_OPTIONS -lpthread - * TEST_NAMED: %t hipMalloc_ArgValidation --tests 1 - * TEST_NAMED: %t hipMalloc_LoopRegression_AllocFreeCycle --tests 2 - * TEST_NAMED: %t hipMalloc_LoopRegression_AllocPool --tests 3 - * TEST_NAMED: %t hipMallocChild_Concurrency_DefaultGpu --tests 4 - * TEST_NAMED: %t hipMallocChild_Concurrency_MultiGpu --tests 5 - * TEST_NAMED: %t hipMalloc_MultiThreaded_MultiGpu --tests 6 - * HIT_END - */ - -#include -#ifdef __linux__ -#include -#include -#endif -#include -#include -#include -#include - -#include "test_common.h" - -/* Max alloc/free iterations for bigger chunks */ -#define MAX_ALLOCFREE_BC (10000) - -/* Buffer size for alloc/free cycles */ -#define BUFF_SIZE_AF (5*1024*1024) - -/* You may change it for individual test. - * But default 100 is for quick return in Jenkin Build */ -#define NUM_DIV 100 - -/* Max alloc/free iterations for smaller chunks */ -#define MAX_ALLOCFREE_SC (5000000/NUM_DIV) - -/* Max alloc and pool iterations (TBD) */ -#define MAX_ALLOCPOOL_ITER (2000000/NUM_DIV) - -/** - * Validates data consitency on supplied gpu - */ -bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) { - size_t Nbytes = N * sizeof(int); - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t prevAvl, prevTot, curAvl, curTot; - bool TestPassed = true; - - HIPCHECK(hipSetDevice(gpu)); - HIPCHECK(hipMemGetInfo(&prevAvl, &prevTot)); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - HIPCHECK(hipMemGetInfo(&curAvl, &curTot)); - - if (!concurOnOneGPU && (prevAvl < curAvl|| prevTot != curTot)) { - //In concurrent calls on one GPU, we cannot verify leaking in this way - printf("%s : Memory allocation mismatch observed." - "Possible memory leak.\n", __func__); - TestPassed &= false; - } - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, 0, static_cast(A_d), - static_cast(B_d), C_d, N); - - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) { - printf("Validation PASSED for gpu %d from pid %d\n", gpu, getpid()); - } else { - printf("%s : Validation FAILED for gpu %d from pid %d\n", - __func__, gpu, getpid()); - TestPassed &= false; - } - - HIPCHECK(hipMemGetInfo(&prevAvl, &prevTot)); - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipMemGetInfo(&curAvl, &curTot)); - - if (!concurOnOneGPU && (curAvl < prevAvl || prevTot != curTot)) { - //In concurrent calls on one GPU, we cannot verify leaking in this way - printf("%s : Memory allocation mismatch observed." - "Possible memory leak.\n", __func__); - TestPassed &= false; - } - - return TestPassed; -} - -/** - * Fetches Gpu device count - */ -void getDeviceCount(int *pdevCnt) { -#ifdef __linux__ - int fd[2], val = 0; - pid_t childpid; - - // create pipe descriptors - pipe(fd); - - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - - childpid = fork(); - - if (childpid > 0) { // Parent - close(fd[1]); - // parent will wait to read the device cnt - read(fd[0], &val, sizeof(val)); - - // close the read-descriptor - close(fd[0]); - - // wait for child exit - wait(NULL); - - *pdevCnt = val; - } else if (!childpid) { // Child - int devCnt = 1; - // writing only, no need for read-descriptor - close(fd[0]); - - HIPCHECK(hipGetDeviceCount(&devCnt)); - // send the value on the write-descriptor: - write(fd[1], &devCnt, sizeof(devCnt)); - - // close the write descriptor: - close(fd[1]); - exit(0); - } else { // failure - *pdevCnt = 1; - return; - } - -#else - HIPCHECK(hipGetDeviceCount(pdevCnt)); -#endif -} - -/** - * Regress memory allocation and free in loop - */ -bool regressAllocInLoop(int gpu) { - bool TestPassed = true; - size_t tot, avail, ptot, pavail; - int i = 0; - int *ptr; - - HIPCHECK(hipSetDevice(gpu)); - - // Exercise allocation in loop with bigger chunks - for (i = 0; i < MAX_ALLOCFREE_BC; i++) { - size_t numBytes = BUFF_SIZE_AF; - - HIPCHECK(hipMemGetInfo(&pavail, &ptot)); - HIPCHECK(hipMalloc(&ptr, numBytes)); - HIPCHECK(hipMemGetInfo(&avail, &tot)); - - if (pavail-avail < numBytes) // We expect pavail-avail >= numBytes - { - printf("LoopAllocation %d : Memory allocation of %6.2fMB " - "not matching with hipMemGetInfo - FAIL\n" - "pavail=%zu, ptot=%zu, avail=%zu, tot=%zu, pavail-avail=%zu \n", - i, numBytes/(1024.0*1024.0), pavail, ptot, avail, tot, pavail-avail); - TestPassed &= false; - HIPCHECK(hipFree(ptr)); - break; - } - - HIPCHECK(hipFree(ptr)); - } - - // Exercise allocation in loop with smaller chunks and max iters - HIPCHECK(hipMemGetInfo(&pavail, &ptot)); - - for (i = 0; i < MAX_ALLOCFREE_SC; i++) { - size_t numBytes = 16; - - HIPCHECK(hipMalloc(&ptr, numBytes)); - - HIPCHECK(hipFree(ptr)); - } - - HIPCHECK(hipMemGetInfo(&avail, &tot)); - - if ((pavail != avail) || (ptot != tot)) { - printf("LoopAllocation : Memory allocation mismatch observed." - "Possible memory leak."); - TestPassed &= false; - } - - return TestPassed; -} - -/* - * Thread func to regress alloc and check data consistency - */ - -std::atomic g_thTestPassed(true); - -void threadFunc(int gpu) { - g_thTestPassed = g_thTestPassed & regressAllocInLoop(gpu); - g_thTestPassed = g_thTestPassed & validateMemoryOnGPU(gpu); - - printf("thread execution status on gpu(%d) : %d\n", gpu, g_thTestPassed.load()); -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - if (p_tests == 1) { // Arg validation - // Test hipMalloc for zero size - bool TestPassed = true; - int *ptr; - - HIPCHECK(hipMalloc(&ptr, 0)); - - // ptr expected to be reset to null ptr - if (ptr) { - printf("ArgValidation : Failed in zero size test\n"); - TestPassed &= false; - } - - // Free null ptr - HIPCHECK(hipFree(ptr)); - - // Test hipMalloc for invalid arguments - hipError_t ret; - - if ((ret = hipMalloc(NULL, 100)) != hipErrorInvalidValue) { - printf("ArgValidation : Inappropritate error value returned" - " for invalid argument. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - // Test hipMalloc for Maximum value of size_t - if ((ret = hipMalloc(&ptr, std::numeric_limits::max())) - != hipErrorMemoryAllocation) { - printf("ArgValidation : Invalid error returned for max size_t." - " Error: '%s'(%d)\n", hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipMalloc ArgumentValidation Failure!"); - } - - } else if (p_tests == 2) { // Loop Regression Alloc/Free Cycle - bool TestPassed = true; - - TestPassed &= regressAllocInLoop(0); - TestPassed &= validateMemoryOnGPU(0); - - if (TestPassed) { - passed(); - } else { - failed("hipMalloc_LoopRegression_AllocFreeCycle Failure!"); - } - - } else if (p_tests == 3) { // Loop Regression Alloc and Pool - size_t avail, tot, pavail, ptot; - bool TestPassed = true; - hipError_t err; - int *ptr; - - std::vector ptrlist; - - HIPCHECK(hipMemGetInfo(&pavail, &ptot)); - - // Allocate small chunks of memory million times - for (int i = 0; i < MAX_ALLOCPOOL_ITER; i++) { // Iterations TBD - if ((err = hipMalloc(&ptr, 10)) != hipSuccess) { - HIPCHECK(hipMemGetInfo(&avail, &tot)); - - printf("Loop regression pool allocation failure. " - "Total gpu memory : %6.2fMB, Free memory %6.2fMB iter %d error '%s'\n", - tot/(1024.0*1024.0), avail/(1024.0*1024.0), i, hipGetErrorString(err)); - - TestPassed &= false; - break; - } - - // Store pointers allocated to emulate memory pool of app - ptrlist.push_back(ptr); - } - - // Free ptrs at later point of time - for ( auto &t : ptrlist ) { - HIPCHECK(hipFree(t)); - } - - HIPCHECK(hipMemGetInfo(&avail, &tot)); - - TestPassed &= validateMemoryOnGPU(0); - - if ((pavail != avail) || (ptot != tot)) { - printf("%s : Memory allocation mismatch observed. Possible memory leak.", - __func__); - TestPassed &= false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipMalloc_LoopRegression_AllocPool failure!"); - } - - } else if (p_tests == 4) { - bool TestPassed = true; - -#ifdef __linux__ - // Parallel execution of parent and child on gpu0 - int pid; - - if ((pid = fork()) < 0) { - printf("Child_Concurrency_Gpu0 : fork() returned error %d.", pid); - TestPassed &= false; - - } else if (!pid) { // Child process - bool TestPassedChild = true; - TestPassedChild = validateMemoryOnGPU(0, true); - - if (TestPassedChild) { - exit(0); // child exit with success status - } else { - printf("Child_Concurrency_Gpu0 : childpid %d failed\n", getpid()); - exit(1); // child exit with failure status - } - - } else { // Parent process - int exitStatus; - TestPassed = validateMemoryOnGPU(0, true); - - pid = wait(&exitStatus); - if ( WEXITSTATUS(exitStatus) || ( pid < 0 ) ) - TestPassed &= false; - } -#else - printf("Test hipMallocChild_Concurrency_DefaultGpu skipped on non-linux\n"); -#endif - - // TC scenarios specific to linux - // are treated as pass in windows. - if (TestPassed) { - passed(); - } else { - failed("hipMallocChild_Concurrency_DefaultGpu Failed!"); - } - - } else if (p_tests == 5) { - bool TestPassed = true; -#ifdef __linux__ - // Parallel execution on multiple gpus from different child processes - int devCnt = 1, pid = 0, cumStatus = 0; - - // Get GPU count - getDeviceCount(&devCnt); - - // Spawn child for each GPU - for (int gpu = 0; gpu < devCnt; gpu++) { - if ((pid = fork()) < 0) { - printf("Child_Concurrency_MultiGpu : fork() returned error %d\n", pid); - failed("Test Failed!"); - - } else if (!pid) { // Child process - bool TestPassedChild = true; - TestPassedChild = validateMemoryOnGPU(gpu); - - if (TestPassedChild) { - exit(0); // child exit with success status - } else { - printf("Child_Concurrency_MultiGpu : childpid %d failed\n", - getpid()); - exit(1); // child exit with failure status - } - } - } - - // Parent shall wait for child to complete - for (int i = 0; i < devCnt; i++) { - int pidwait = 0, exitStatus; - pidwait = wait(&exitStatus); - - if (pidwait < 0) { - TestPassed &= false; - break; - } - - cumStatus |= WEXITSTATUS(exitStatus); - } - - // Cummulative status of all child - if (cumStatus) { - TestPassed &= false; - } - -#else - printf("Test hipMallocChild_Concurrency_MultiGpu skipped on non-linux\n"); -#endif - - - // TC scenarios specific to linux - // are treated as pass in windows. - if (TestPassed) { - passed(); - } else { - failed("hipMallocChild_Concurrency_MultiGpu Failed!"); - } - - } else if (p_tests == 6) { // Multithreaded multiple gpu execution - std::vector threadlist; - int devCnt = 1; - - // Get GPU count - getDeviceCount(&devCnt); - - - for (int i = 0; i < devCnt; i++) { - threadlist.push_back(std::thread(threadFunc, i)); - } - - for (auto &t : threadlist) { - t.join(); - } - - if (g_thTestPassed) { - passed(); - } else { - failed("hipMalloc_MultiThreaded_MultiGpu Failed!"); - } - } else { - failed("Didnt receive any valid option. Try options 1 to 6\n"); - } -} - diff --git a/tests/src/runtimeApi/memory/hipMallocManaged.cpp b/tests/src/runtimeApi/memory/hipMallocManaged.cpp deleted file mode 100644 index 5fcf1462c6..0000000000 --- a/tests/src/runtimeApi/memory/hipMallocManaged.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include "test_common.h" - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp CLANG_OPTIONS -g -O0 - * TEST: %t -N 256M - * HIT_END - */ - -__global__ -void add(int n, float *x, float *y) -{ - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] + y[i]; -} - -int main(int argc, char *argv[]) -{ - HipTest::parseStandardArguments(argc, argv, true); - int concurrentManagedAccess = 0; - HIPCHECK(hipDeviceGetAttribute(&concurrentManagedAccess, - hipDeviceAttributeConcurrentManagedAccess, - p_gpuDevice)); - if(!concurrentManagedAccess) { - printf("info: concurrent managed access not supported on device %d\n Skipped\n", - p_gpuDevice); - passed(); - } - - printf("info: set device to %d\n", p_gpuDevice); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - int numElements = (N < (64 * 1024 * 1024)) ? 64 * 1024 * 1024 : N; - bool testResult = true; - float *A, *B; - - HIPCHECK(hipMallocManaged(&A, numElements*sizeof(float))); - HIPCHECK(hipMallocManaged(&B, numElements*sizeof(float))); - - for (int i = 0; i < numElements; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - - hipDevice_t device = hipCpuDeviceId; - - HIPCHECK(hipMemAdvise(A, numElements*sizeof(float), hipMemAdviseSetReadMostly, device)); - HIPCHECK(hipMemPrefetchAsync(A, numElements*sizeof(float), 0)); - HIPCHECK(hipMemPrefetchAsync(B, numElements*sizeof(float), 0)); - HIPCHECK(hipDeviceSynchronize()); - HIPCHECK(hipMemRangeGetAttribute(&device, sizeof(device), hipMemRangeAttributeLastPrefetchLocation, A, numElements*sizeof(float))); - if (device != p_gpuDevice) { - printf("hipMemRangeGetAttribute error, device = %d!\n", device); - } - uint32_t read_only = 0xf; - HIPCHECK(hipMemRangeGetAttribute(&read_only, sizeof(read_only), hipMemRangeAttributeReadMostly, A, numElements*sizeof(float))); - if (read_only != 1) { - printf("hipMemRangeGetAttribute error, read_only = %d!\n", read_only); - } - - int blockSize = 256; - int numBlocks = (numElements + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - hipEvent_t event0, event1; - HIPCHECK(hipEventCreate(&event0)); - HIPCHECK(hipEventCreate(&event1)); - HIPCHECK(hipEventRecord(event0, 0)); - hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, numElements, A, B); - HIPCHECK(hipEventRecord(event1, 0)); - HIPCHECK(hipDeviceSynchronize()); - float time = 0.0f; - HIPCHECK(hipEventElapsedTime(&time, event0, event1)); - printf("Time %.3f ms\n", time); - - float maxError = 0.0f; - HIPCHECK(hipMemPrefetchAsync(B, numElements*sizeof(float), hipCpuDeviceId)); - HIPCHECK(hipDeviceSynchronize()); - device = p_gpuDevice; - HIPCHECK(hipMemRangeGetAttribute(&device, sizeof(device), hipMemRangeAttributeLastPrefetchLocation, A, numElements*sizeof(float))); - if (device != hipCpuDeviceId) { - printf("hipMemRangeGetAttribute error (CPU device is expected), device = %d!\n", device); - } - for (int i = 0; i < numElements; i++) - maxError = fmax(maxError, fabs(B[i]-3.0f)); - - HIPCHECK(hipFree(A)); - HIPCHECK(hipFree(B)); - if(maxError == 0.0f) - passed(); - failed("Output Mismatch\n"); -} diff --git a/tests/src/runtimeApi/memory/hipMallocManaged_MultiScenario.cpp b/tests/src/runtimeApi/memory/hipMallocManaged_MultiScenario.cpp deleted file mode 100644 index e1f17f3ec4..0000000000 --- a/tests/src/runtimeApi/memory/hipMallocManaged_MultiScenario.cpp +++ /dev/null @@ -1,463 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* Test 6 is disabled */ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipMallocManaged1 --tests 1 - * TEST_NAMED: %t hipMallocManaged2 --tests 2 - * TEST_NAMED: %t hipMallocManagedNegativeTests --tests 3 - * TEST_NAMED: %t hipMallocManagedMultiChunkSingleDevice --tests 4 - * TEST_NAMED: %t hipMallocManagedMultiChunkMultiDevice --tests 5 - * TEST_NAMED: %t hipMallocManagedOversubscription --tests 6 EXCLUDE_HIP_PLATFORM nvidia EXCLUDE_HIP_RUNTIME rocclr - * HIT_END - */ - -#include -#include "test_common.h" -#define N 1048576 // equals to (1024*1024) -#define INIT_VAL 123 - -/* - * Kernel function to perform addition operation. - */ -template -__global__ void -vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < NUM_ELMTS; i += stride) { - Ad2[i] = Ad1[i] + Ad1[i]; - } -} - -// The following Test case tests the following scenario: -// A large chunk of hipMallocManaged() memory(Hmm) is created -// Equal parts of Hmm is accessed on available gpus and -// kernel is launched on acessed chunk of hmm memory -// and checks if there are any inconsistencies or access issues -bool MultiChunkMultiDevice(int NumDevices) { - std::atomic DataMismatch{0}; - bool IfTestPassed = true; - int Counter = 0; - unsigned int NUM_ELMS = (1024 * 1024); - float *Ad[NumDevices], *Hmm = NULL, *Ah = new float[NUM_ELMS]; - hipStream_t stream[NumDevices]; - for (int Oloop = 0; Oloop < NumDevices; ++Oloop) { - HIPCHECK(hipSetDevice(Oloop)); - HIPCHECK(hipMalloc(&Ad[Oloop], NUM_ELMS * sizeof(float))); - HIPCHECK(hipMemset(Ad[Oloop], 0, NUM_ELMS * sizeof(float))); - HIPCHECK(hipStreamCreate(&stream[Oloop])); - } - HIPCHECK(hipMallocManaged(&Hmm, (NumDevices * NUM_ELMS * sizeof(float)))); - for (int i = 0; i < NumDevices; ++i) { - for (; Counter < ((i + 1) * NUM_ELMS); ++Counter) { - Hmm[Counter] = INIT_VAL + i; - } - } - const unsigned threadsPerBlock = 256; - const unsigned blocks = (NUM_ELMS + 255)/256; - for (int Klaunch = 0; Klaunch < NumDevices; ++Klaunch) { - - // If without setting device, Hmm value will be read as 0 in kernel on - // GPU where Hmm isn't allocated by hipMallocManaged(). This looks like - // a bug of cuda. The following line is to fix the bug on cuda only. - HIPCHECK(hipSetDevice(Klaunch)); - - vector_sum <<>> - (&Hmm[Klaunch * NUM_ELMS], Ad[Klaunch], NUM_ELMS); - } - for (int m = 0; m < NumDevices; ++m) { - HIPCHECK(hipStreamSynchronize(stream[m])); - HIPCHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), - hipMemcpyDeviceToHost)); - for (int n = 0; n < NUM_ELMS; ++n) { - if (Ah[n] != ((INIT_VAL + m) * 2)) { - DataMismatch++; - } - } - memset(reinterpret_cast(Ah), 0, NUM_ELMS * sizeof(float)); - } - if (DataMismatch.load() != 0) { - printf("MultiChunkMultiDevice: Mismatch observed!\n"); - IfTestPassed = false; - } - for (int i = 0; i < NumDevices; ++i) { - HIPCHECK(hipFree(Ad[i])); - HIPCHECK(hipStreamDestroy(stream[i])); - } - HIPCHECK(hipFree(Hmm)); - free(Ah); - return IfTestPassed; -} - -// The following Test case tests the following scenario: -// A large chunk of hipMallocManaged() memory(Hmm) is created -// Equal parts of Hmm is accessed and -// kernel is launched on acessed chunk of hmm memory -// and checks if there are any inconsistencies or access issues - -bool MultiChunkSingleDevice(int NumDevices) { - std::atomic DataMismatch{0}; - int Chunks = 4, Counter = 0; - bool IfTestPassed = true; - unsigned int NUM_ELMS = (1024 * 1024); - float *Ad[Chunks], *Hmm = NULL, *Ah = new float[NUM_ELMS]; - hipStream_t stream[Chunks]; - for (int i = 0; i < Chunks; ++i) { - HIPCHECK(hipMalloc(&Ad[i], NUM_ELMS * sizeof(float))); - HIPCHECK(hipMemset(Ad[i], 0, NUM_ELMS * sizeof(float))); - HIPCHECK(hipStreamCreate(&stream[i])); - } - HIPCHECK(hipMallocManaged(&Hmm, (Chunks * NUM_ELMS * sizeof(float)))); - for (int i = 0; i < Chunks; ++i) { - for (; Counter < ((i + 1) * NUM_ELMS); ++Counter) { - Hmm[Counter] = (INIT_VAL + i); - } - } - const unsigned threadsPerBlock = 256; - const unsigned blocks = (NUM_ELMS + 255)/256; - for (int k = 0; k < Chunks; ++k) { - vector_sum <<>> - (&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS); - } - HIPCHECK(hipDeviceSynchronize()); - for (int m = 0; m < Chunks; ++m) { - HIPCHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), - hipMemcpyDeviceToHost)); - for (int n = 0; n < NUM_ELMS; ++n) { - if (Ah[n] != ((INIT_VAL + m) * 2)) { - DataMismatch++; - } - } - } - if (DataMismatch.load() != 0) { - printf("MultiChunkSingleDevice: Mismatch observed!\n"); - IfTestPassed = false; - } - for (int i = 0; i < Chunks; ++i) { - HIPCHECK(hipFree(Ad[i])); - HIPCHECK(hipStreamDestroy(stream[i])); - } - HIPCHECK(hipFree(Hmm)); - free(Ah); - return IfTestPassed; -} - -// The following tests oversubscription hipMallocManaged() api -// Currently disabled. -bool TestOversubscriptionMallocManaged(int NumDevices) { - bool IfTestPassed = true; - hipError_t err; - void *A = NULL; - size_t total = 0, free = 0; - HIPCHECK(hipMemGetInfo(&free, &total)); - // ToDo: In case of HMM, memory over-subscription is allowed. Hence, relook - // into how out of memory can be tested. - // Demanding more mem size than available - err = hipMallocManaged(&A, (free +1), hipMemAttachGlobal); - if (hipErrorOutOfMemory != err) { - printf("hipMallocManaged: Returned %s for size value > device memory\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - return IfTestPassed; -} - -// The following test does negative testing of hipMallocManaged() api -// by passing invalid values and check if the behavior is as expected -bool NegativeTestsMallocManaged(int NumDevices) { - bool IfTestPassed = true; - hipError_t err; - void *A; - size_t total = 0, free = 0; - HIPCHECK(hipMemGetInfo(&free, &total)); - - err = hipMallocManaged(NULL, 1024, hipMemAttachGlobal); - if (hipErrorInvalidValue != err) { - printf("hipMallocManaged: Returned %s when devPtr is null\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - // cuda api doc says : If size is 0, cudaMallocManaged returns - // cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns - // success and contradicts with api doc. - - // With size(0), api expected to return error code (or) - // reset ptr while returning success (to accommodate cuda 11.2 api behavior). - err = hipMallocManaged(&A, 0, hipMemAttachGlobal); - if ((hipErrorInvalidValue == err) || - ((hipSuccess == err) && (nullptr == A))) { - IfTestPassed &= true; - } else { - IfTestPassed = false; - } - - err = hipMallocManaged(NULL, 0, hipMemAttachGlobal); - if (hipErrorInvalidValue != err) { - printf("hipMallocManaged: Returned %s when devPtr & size is null & 0\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - err = hipMallocManaged(NULL, 1024, hipMemAttachHost); - if (hipErrorInvalidValue != err) { - printf("hipMallocManaged: Returned %s for 'hipMemAttachHost' flag\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - // cuda api doc says : If size is 0, cudaMallocManaged returns - // cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns - // success and contradicts with api doc. - - // With size(0), api expected to return error code (or) - // reset ptr while returning success (to accommodate cuda 11.2 api behavior). - err = hipMallocManaged(&A, 0, hipMemAttachHost); - if ((hipErrorInvalidValue == err) || - ((hipSuccess == err) && (nullptr == A))) { - IfTestPassed &= true; - } else { - IfTestPassed = false; - } - - err = hipMallocManaged(NULL, 0, hipMemAttachHost); - if (hipErrorInvalidValue != err) { - printf("hipMallocManaged: Returned %s when devPtr & size is null & 0\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - err = hipMallocManaged(NULL, 0, 0); - if (hipErrorInvalidValue != err) { - printf("hipMallocManaged: Returned %s when params are null, 0, 0\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - err = hipMallocManaged(&A, 1024, 145); - if (hipErrorInvalidValue != err) { - printf("hipMallocManaged: Returned %s when flag param is numerical 145\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - err = hipMallocManaged(&A, -10, hipMemAttachGlobal); - if (hipErrorOutOfMemory != err) { - printf("hipMallocManaged: Returned %s for negative size value.\n", - hipGetErrorString(err)); - IfTestPassed = false; - } - - return IfTestPassed; -} - - -// Allocate two pointers using hipMallocManaged(), initialize, -// then launch kernel using these pointers directly and -// later validate the content without using any Memcpy. -template -bool TestMallocManaged2(int NumDevices) { - bool IfTestPassed = true; - T *Hmm1 = NULL, *Hmm2 = NULL; - - for (int i = 0; i < NumDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - std::atomic DataMismatch{0}; - HIPCHECK(hipMallocManaged(&Hmm1, N * sizeof(T))); - HIPCHECK(hipMallocManaged(&Hmm2, N * sizeof(T))); - for (int m = 0; m < N; ++m) { - Hmm1[m] = m; - Hmm2[m] = 0; - } - const unsigned threadsPerBlock = 256; - const unsigned blocks = (N + 255)/256; - // Kernel launch - vector_sum <<>> (Hmm1, Hmm2, N); - HIPCHECK(hipDeviceSynchronize()); - for (int v = 0; v < N; ++v) { - if (Hmm2[v] != (v + v)) { - DataMismatch++; - } - } - if (DataMismatch.load() != 0) { - IfTestPassed = false; - } - HIPCHECK(hipFree(Hmm1)); - HIPCHECK(hipFree(Hmm2)); - } - return IfTestPassed; -} - -// In the following test, a memory is created using hipMallocManaged() by -// setting a device and verified if it is accessible when the context is set -// to all other devices. This include verification and Device two Device -// transfers and kernel launch o discover if there any access issues. - -template -bool TestMallocManaged1(int NumDevices) { - std::atomic DataMismatch; - bool TestPassed = true; - T *Ah1 = new T[N], *Ah2 = new T[N], *Ad = NULL, *Hmm = NULL; - - for (int i =0; i < N; ++i) { - Ah1[i] = INIT_VAL; - Ah2[i] = 0; - } - for (int Oloop = 0; Oloop < NumDevices; ++Oloop) { - DataMismatch = 0; - HIPCHECK(hipSetDevice(Oloop)); - HIPCHECK(hipMallocManaged(&Hmm, N * sizeof(T))); - for (int Iloop = 0; Iloop < NumDevices; ++Iloop) { - HIPCHECK(hipSetDevice(Iloop)); - HIPCHECK(hipMalloc(&Ad, N * sizeof(T))); - // Copy data from host to hipMallocMananged memory and verify - HIPCHECK(hipMemcpy(Hmm, Ah1, N * sizeof(T), hipMemcpyHostToDevice)); - for (int v = 0; v < N; ++v) { - if (Hmm[v] != INIT_VAL) { - DataMismatch++; - } - } - if (DataMismatch.load() != 0) { - printf("Mismatch is observed with host data at device %d", Iloop); - printf(" while hipMallocManaged memory set to the device %d\n", Oloop); - TestPassed = false; - DataMismatch = 0; - } - // Executing D2D transfer with hipMallocManaged memory and verify - HIPCHECK(hipMemcpy(Ad, Hmm, N * sizeof(T), hipMemcpyDeviceToDevice)); - HIPCHECK(hipMemcpy(Ah2, Ad, N * sizeof(T), hipMemcpyDeviceToHost)); - for (int k = 0; k < N; ++k) { - if (Ah2[k] != INIT_VAL) { - DataMismatch++; - } - } - if (DataMismatch.load() != 0) { - printf("Mismatch is observed with D2D transfer at device %d\n", Iloop); - printf(" while hipMallocManaged memory set to the device %d\n", Oloop); - TestPassed = false; - DataMismatch = 0; - } - HIPCHECK(hipMemset(Ad, 0, N * sizeof(T))); - const unsigned threadsPerBlock = 256; - const unsigned blocks = (N + 255)/256; - // Launching the kernel to check if there is any access issue with - // hipMallocManaged memory and local device's memory - vector_sum <<>> (Hmm, Ad, N); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy(Ah2, Ad, N * sizeof(T), hipMemcpyDeviceToHost)); - for (int m = 0; m < N; ++m) { - if (Ah2[m] != 246) { - DataMismatch++; - } - } - if (DataMismatch.load() != 0) { - printf("Data Mismatch observed after kernel lch device %d\n", Iloop); - TestPassed = false; - DataMismatch = 0; - } - HIPCHECK(hipFree(Ad)); - } - HIPCHECK(hipFree(Hmm)); - } - free(Ah1); - free(Ah2); - return TestPassed; -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - if ((p_tests <= 0) || (p_tests > 5)) { - failed("Valid arguments are from 1 to 5"); - } - - int managed_memory = 0; - HIPCHECK(hipDeviceGetAttribute(&managed_memory, - hipDeviceAttributeManagedMemory, - p_gpuDevice)); - if (!managed_memory) { - printf("info: managed memory access not supported on device %d\n Skipped\n", p_gpuDevice); - passed(); - } - - int NumDevices = 0; - HIPCHECK(hipGetDeviceCount(&NumDevices)); - - bool TestStatus = true, OverAllStatus = true; - if (p_tests == 1) { - TestStatus = TestMallocManaged1(NumDevices); - if (!TestStatus) { - printf("Test Failed with float datatype.\n"); - OverAllStatus = false; - } - TestStatus = TestMallocManaged1(NumDevices); - if (!TestStatus) { - printf("Test Failed with int datatype.\n"); - OverAllStatus = false; - } - TestStatus = TestMallocManaged1(NumDevices); - if (!TestStatus) { - printf("Test Failed with unsigned char datatype.\n"); - OverAllStatus = false; - } - TestStatus = TestMallocManaged1(NumDevices); - if (!TestStatus) { - printf("Test Failed with double datatype.\n"); - OverAllStatus = false; - } - if (!OverAllStatus) { - failed("\n"); - } - } - if (p_tests == 2) { - TestStatus = TestMallocManaged2(NumDevices); - if (!TestStatus) { - failed("Test Failed with float datatype."); - } - } - if (p_tests == 3) { - TestStatus = NegativeTestsMallocManaged(NumDevices); - if (!TestStatus) { - failed("Negative Tests with hipMallocManaged() failed!."); - } - } - if (p_tests == 4) { - TestStatus = MultiChunkSingleDevice(NumDevices); - if (!TestStatus) { - failed("hipMallocManaged: MultiChunkSingleDevice test failed!"); - } - } - if (p_tests == 5) { - TestStatus = MultiChunkMultiDevice(NumDevices); - if (!TestStatus) { - failed("hipMallocManaged: MultiChunkMultiDevice test failed!"); - } - } - if (p_tests == 6) { - TestStatus = TestOversubscriptionMallocManaged(NumDevices); - if (!TestStatus) { - failed("hipMallocManaged: TestOversubscriptionMallocManaged failed!"); - } - } - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipManagedKeyword.cpp b/tests/src/runtimeApi/memory/hipManagedKeyword.cpp deleted file mode 100644 index a3d714a6a3..0000000000 --- a/tests/src/runtimeApi/memory/hipManagedKeyword.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include -#include "test_common.h" - -//Enable test when compiler support is available in mainline -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * HIT_END - */ -#define N 1048576 -__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!! -__managed__ float B[N]; -__managed__ int x = 0; - -__global__ void add() -{ - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < N; i += stride) - B[i] = A[i] + B[i]; -} - -__global__ void GPU_func() { - x++; -} - -bool managedSingleGPUTest() { - bool testResult = true; - - for (int i = 0; i < N; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - - int blockSize = 256; - int numBlocks = (N + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0); - - hipDeviceSynchronize(); - - float maxError = 0.0f; - for (int i = 0; i < N; i++) - maxError = fmax(maxError, fabs(B[i]-3.0f)); - - if(maxError == 0.0f) { - return true; - } - return false; -} - -bool managedMultiGPUTest() { - int numDevices = 0; - hipGetDeviceCount(&numDevices); - - for (int i = 0; i < numDevices; i++) { - hipSetDevice(i); - GPU_func<<< 1, 1 >>>( ); - hipDeviceSynchronize(); - } - if(x == numDevices) { - return true; - } - return false; -} - -int main(int argc, char *argv[]) { - bool testStatus = true, OverAllStatus = true; - testStatus = managedSingleGPUTest(); - if (!testStatus) { - printf("managed keyword Single GPU Test failed\n"); - OverAllStatus = false; - } - testStatus = managedMultiGPUTest(); - if (!testStatus) { - printf("managed keyword Multi GPU Test failed\n"); - OverAllStatus = false; - } - if (!OverAllStatus) { - failed("\n"); - } - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp b/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp deleted file mode 100644 index 437705ac5a..0000000000 --- a/tests/src/runtimeApi/memory/hipMemPtrGetInfo.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -struct { - float a; - int b; - void* c; -} Struct; - -int main() { - int* iPtr; - float* fPtr; - struct Struct* sPtr; - size_t sSetSize = 1024, sGetSize; - hipMalloc(&iPtr, sSetSize); - hipMalloc(&fPtr, sSetSize); - hipMalloc(&sPtr, sSetSize); - hipMemPtrGetInfo(iPtr, &sGetSize); - assert(sGetSize == sSetSize); - hipMemPtrGetInfo(fPtr, &sGetSize); - assert(sGetSize == sSetSize); - hipMemPtrGetInfo(sPtr, &sGetSize); - assert(sGetSize == sSetSize); - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy.cpp b/tests/src/runtimeApi/memory/hipMemcpy.cpp deleted file mode 100644 index 6735616b0b..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy.cpp +++ /dev/null @@ -1,529 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST_NAMED: %t hipMemcpy-modes --tests 0x1 - * TEST_NAMED: %t hipMemcpy-size --tests 0x6 - * TEST_NAMED: %t hipMemcpy-dev-offsets --tests 0x10 - * TEST_NAMED: %t hipMemcpy-host-offsets --tests 0x20 - * TEST_NAMED: %t hipMemcpy-multithreaded --tests 0x8 - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#include -#else -#include "sys/types.h" -#include "sys/sysinfo.h" -#endif - -void printSep() { - printf( - "======================================================================================\n"); -} - -//------- -template -class DeviceMemory { - public: - DeviceMemory(size_t numElements); - ~DeviceMemory(); - - T* A_d() const { return _A_d + _offset; }; - T* B_d() const { return _B_d + _offset; }; - T* C_d() const { return _C_d + _offset; }; - T* C_dd() const { return _C_dd + _offset; }; - - size_t maxNumElements() const { return _maxNumElements; }; - - - void offset(int offset) { _offset = offset; }; - int offset() const { return _offset; }; - - private: - T* _A_d; - T* _B_d; - T* _C_d; - T* _C_dd; - - - size_t _maxNumElements; - int _offset; -}; - -template -DeviceMemory::DeviceMemory(size_t numElements) : _maxNumElements(numElements), _offset(0) { - T** np = nullptr; - HipTest::initArrays(&_A_d, &_B_d, &_C_d, np, np, np, numElements, 0); - - - size_t sizeElements = numElements * sizeof(T); - - - HIPCHECK(hipMalloc(&_C_dd, sizeElements)); -} - - -template -DeviceMemory::~DeviceMemory() { - T* np = nullptr; - HipTest::freeArrays(_A_d, _B_d, _C_d, np, np, np, 0); - - HIPCHECK(hipFree(_C_dd)); - - _C_dd = NULL; -}; - - -//------- -template -class HostMemory { - public: - HostMemory(size_t numElements, bool usePinnedHost); - void reset(size_t numElements, bool full = false); - ~HostMemory(); - - - T* A_h() const { return _A_h + _offset; }; - T* B_h() const { return _B_h + _offset; }; - T* C_h() const { return _C_h + _offset; }; - - - size_t maxNumElements() const { return _maxNumElements; }; - - void offset(int offset) { _offset = offset; }; - int offset() const { return _offset; }; - - public: - // Host arrays, secondary copy - T* A_hh; - T* B_hh; - - bool _usePinnedHost; - - private: - size_t _maxNumElements; - - int _offset; - - // Host arrays - T* _A_h; - T* _B_h; - T* _C_h; -}; - -template -HostMemory::HostMemory(size_t numElements, bool usePinnedHost) - : _maxNumElements(numElements), _usePinnedHost(usePinnedHost), _offset(0) { - T** np = nullptr; - HipTest::initArrays(np, np, np, &_A_h, &_B_h, &_C_h, numElements, usePinnedHost); - - A_hh = NULL; - B_hh = NULL; - - - size_t sizeElements = numElements * sizeof(T); - - if (usePinnedHost) { - HIPCHECK(hipHostMalloc((void**)&A_hh, sizeElements, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&B_hh, sizeElements, hipHostMallocDefault)); - } else { - A_hh = (T*)malloc(sizeElements); - B_hh = (T*)malloc(sizeElements); - } -} - - -template -void HostMemory::reset(size_t numElements, bool full) { - // Initialize the host data: - for (size_t i = 0; i < numElements; i++) { - (A_hh)[i] = 1097.0 + i; - (B_hh)[i] = 1492.0 + i; // Phi - - if (full) { - (_A_h)[i] = 3.146f + i; // Pi - (_B_h)[i] = 1.618f + i; // Phi - } - } -} - -template -HostMemory::~HostMemory() { - HipTest::freeArraysForHost(_A_h, _B_h, _C_h, _usePinnedHost); - - if (_usePinnedHost) { - HIPCHECK(hipHostFree(A_hh)); - HIPCHECK(hipHostFree(B_hh)); - - } else { - free(A_hh); - free(B_hh); - } -}; - - -//--- -// Test many different kinds of memory copies. -// The subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and -// checks the result. -// -// IN: numElements controls the number of elements used for allocations. -// IN: usePinnedHost : If true, allocate host with hipHostMalloc and is pinned ; else allocate host -// memory with malloc. IN: useHostToHost : If true, add an extra host-to-host copy. IN: -// useDeviceToDevice : If true, add an extra deviceto-device copy after result is produced. IN: -// useMemkindDefault : If true, use memkinddefault (runtime figures out direction). if false, use -// explicit memcpy direction. -// -template -void memcpytest2(DeviceMemory* dmem, HostMemory* hmem, size_t numElements, bool useHostToHost, - bool useDeviceToDevice, bool useMemkindDefault) { - size_t sizeElements = numElements * sizeof(T); - printf( - "test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, " - "useMemkindDefault:%d, offsets:dev:%+d host:+%d\n", - __func__, TYPENAME(T), sizeElements, sizeElements / 1024.0 / 1024.0, hmem->_usePinnedHost, - useHostToHost, useDeviceToDevice, useMemkindDefault, dmem->offset(), hmem->offset()); - - - hmem->reset(numElements); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - - assert(numElements <= dmem->maxNumElements()); - assert(numElements <= hmem->maxNumElements()); - - - if (useHostToHost) { - // Do some extra host-to-host copies here to mix things up: - HIPCHECK(hipMemcpy(hmem->A_hh, hmem->A_h(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost)); - HIPCHECK(hipMemcpy(hmem->B_hh, hmem->B_h(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToHost)); - - - HIPCHECK(hipMemcpy(dmem->A_d(), hmem->A_hh, sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(dmem->B_d(), hmem->B_hh, sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - } else { - HIPCHECK(hipMemcpy(dmem->A_d(), hmem->A_h(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(dmem->B_d(), hmem->B_h(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - } - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(dmem->A_d()), static_cast(dmem->B_d()), - dmem->C_d(), numElements); - - if (useDeviceToDevice) { - // Do an extra device-to-device copy here to mix things up: - HIPCHECK(hipMemcpy(dmem->C_dd(), dmem->C_d(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToDevice)); - - // Destroy the original dmem->C_d(): - HIPCHECK(hipMemset(dmem->C_d(), 0x5A, sizeElements)); - - HIPCHECK(hipMemcpy(hmem->C_h(), dmem->C_dd(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost)); - } else { - HIPCHECK(hipMemcpy(hmem->C_h(), dmem->C_d(), sizeElements, - useMemkindDefault ? hipMemcpyDefault : hipMemcpyDeviceToHost)); - } - - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(hmem->A_h(), hmem->B_h(), hmem->C_h(), numElements); - - - printf(" %s success\n", __func__); -} - - -//--- -// Try all the 16 possible combinations to memcpytest2 - usePinnedHost, useHostToHost, -// useDeviceToDevice, useMemkindDefault -template -void memcpytest2_for_type(size_t numElements) { - printSep(); - - DeviceMemory memD(numElements); - HostMemory memU(numElements, 0 /*usePinnedHost*/); - HostMemory memP(numElements, 1 /*usePinnedHost*/); - - for (int usePinnedHost = 0; usePinnedHost <= 1; usePinnedHost++) { - for (int useHostToHost = 0; useHostToHost <= 1; useHostToHost++) { // TODO - for (int useDeviceToDevice = 0; useDeviceToDevice <= 1; useDeviceToDevice++) { - for (int useMemkindDefault = 0; useMemkindDefault <= 1; useMemkindDefault++) { - memcpytest2(&memD, usePinnedHost ? &memP : &memU, numElements, useHostToHost, - useDeviceToDevice, useMemkindDefault); - } - } - } - } -} - -#ifdef _WIN32 -void memcpytest2_get_host_memory(size_t& free, size_t& total) { - MEMORYSTATUSEX status; - status.dwLength = sizeof(status); - GlobalMemoryStatusEx(&status); - // Windows doesn't allow allocating more than half of system memory to the gpu. - // Since the runtime also needs space for its internal allocations, - // we should not try to allocate more than 40% of reported system memory, - // otherwise we can run into OOM issues. - free = static_cast(0.4 * status.ullAvailPhys); - total = static_cast(0.4 * status.ullTotalPhys); -} -#else -struct sysinfo memInfo; -void memcpytest2_get_host_memory(size_t& free, size_t& total) { - sysinfo(&memInfo); - long long freePhysMem=memInfo.freeram; - freePhysMem *= memInfo.mem_unit; - free = freePhysMem; - long long totalPhysMem=memInfo.totalram; - totalPhysMem *= memInfo.mem_unit; - total = totalPhysMem; -} -#endif - -//--- -// Try many different sizes to memory copy. -template -void memcpytest2_sizes(size_t maxElem = 0) { - printSep(); - printf("test: %s<%s>\n", __func__, TYPENAME(T)); - - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - - size_t free, total, freeCPU, totalCPU; - HIPCHECK(hipMemGetInfo(&free, &total)); - memcpytest2_get_host_memory(freeCPU, totalCPU); - - if (maxElem == 0) { - // Use lesser maxElem if not enough host memory available - size_t maxElemGPU = free / sizeof(T) / 8; - size_t maxElemCPU = freeCPU / sizeof(T) / 8; - maxElem = maxElemGPU < maxElemCPU ? maxElemGPU : maxElemCPU; - } - printf( - " Host: free=%zu (%4.2fMB) total=%zu (%4.2fMB)\n", - freeCPU, (float)(freeCPU / 1024.0 / 1024.0), - totalCPU, (float)(totalCPU / 1024.0 / 1024.0)); - - printf( - " device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", - deviceId, free, (float)(free / 1024.0 / 1024.0), total, (float)(total / 1024.0 / 1024.0), - maxElem * sizeof(T) / 1024.0 / 1024.0); - HIPCHECK(hipDeviceReset()); - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0 /*usePinnedHost*/); - HostMemory memP(maxElem, 1 /*usePinnedHost*/); - - for (size_t elem = 1; elem <= maxElem; elem *= 2) { - memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host - memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host - } -} - - -//--- -// Try many different sizes to memory copy. -template -void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets) { - printSep(); - printf("test: %s<%s>\n", __func__, TYPENAME(T)); - - int deviceId; - HIPCHECK(hipGetDevice(&deviceId)); - - size_t free, total; - HIPCHECK(hipMemGetInfo(&free, &total)); - - - printf( - " device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", - deviceId, free, (float)(free / 1024.0 / 1024.0), total, (float)(total / 1024.0 / 1024.0), - maxElem * sizeof(T) / 1024.0 / 1024.0); - HIPCHECK(hipDeviceReset()); - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0 /*usePinnedHost*/); - HostMemory memP(maxElem, 1 /*usePinnedHost*/); - - size_t elem = maxElem / 2; - - for (int offset = 0; offset < 512; offset++) { - assert(elem + offset < maxElem); - if (devOffsets) { - memD.offset(offset); - } - if (hostOffsets) { - memU.offset(offset); - memP.offset(offset); - } - memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host - memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host - } - - for (int offset = 512; offset < elem; offset *= 2) { - assert(elem + offset < maxElem); - if (devOffsets) { - memD.offset(offset); - } - if (hostOffsets) { - memU.offset(offset); - memP.offset(offset); - } - memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host - memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host - } -} - - -//--- -// Create multiple threads to stress multi-thread locking behavior in the -// allocation/deallocation/tracking logic: -template -void multiThread_1(bool serialize, bool usePinnedHost) { - printSep(); - printf("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, - usePinnedHost); - DeviceMemory memD(N); - HostMemory mem1(N, usePinnedHost); - HostMemory mem2(N, usePinnedHost); - - std::thread t1(memcpytest2, &memD, &mem1, N, 0, 0, 0); - if (serialize) { - t1.join(); - } - - - std::thread t2(memcpytest2, &memD, &mem2, N, 0, 0, 0); - if (serialize) { - t2.join(); - } - - if (!serialize) { - t1.join(); - t2.join(); - } -} - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - printf("info: set device to %d\n", p_gpuDevice); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - - if (p_tests & 0x1) { - printf("\n\n=== tests&1 (types and different memcpy kinds (H2D, D2H, H2H, D2D)\n"); - HIPCHECK(hipDeviceReset()); - memcpytest2_for_type(N); - memcpytest2_for_type(N); - memcpytest2_for_type(N); - memcpytest2_for_type(N); - printf("===\n\n\n"); - } - - - if (p_tests & 0x2) { - // Some tests around the 64KB boundary which have historically shown issues: - printf("\n\n=== tests&0x2 (64KB boundary)\n"); - size_t maxElem = 32 * 1024 * 1024; - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0 /*usePinnedHost*/); - HostMemory memP(maxElem, 0 /*usePinnedHost*/); - // These all pass: - memcpytest2(&memD, &memP, 15 * 1024 * 1024, 0, 0, 0); - memcpytest2(&memD, &memP, 16 * 1024 * 1024, 0, 0, 0); - memcpytest2(&memD, &memP, 16 * 1024 * 1024 + 16 * 1024, 0, 0, 0); - - // Just over 64MB: - memcpytest2(&memD, &memP, 16 * 1024 * 1024 + 512 * 1024, 0, 0, 0); - memcpytest2(&memD, &memP, 17 * 1024 * 1024 + 1024, 0, 0, 0); - memcpytest2(&memD, &memP, 32 * 1024 * 1024, 0, 0, 0); - memcpytest2(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0); - memcpytest2(&memD, &memP, 32 * 1024 * 1024, 1, 1, 0); - memcpytest2(&memD, &memP, 32 * 1024 * 1024, 1, 1, 0); - } - - - if (p_tests & 0x4) { - printf("\n\n=== tests&4 (test sizes)\n"); - HIPCHECK(hipDeviceReset()); - memcpytest2_sizes(0); - printSep(); - } - - - if (p_tests & 0x8) { - printf("\n\n=== tests&8\n"); - HIPCHECK(hipDeviceReset()); - printSep(); - - // Simplest cases: serialize the threads, and also used pinned memory: - // This verifies that the sub-calls to memcpytest2 are correct. - multiThread_1(true, true); - - // Serialize, but use unpinned memory to stress the unpinned memory xfer path. - multiThread_1(true, false); - - // Remove serialization, so two threads are performing memory copies in parallel. - multiThread_1(false, true); - - // Remove serialization, and use unpinned. - multiThread_1(false, false); // TODO - printf("===\n\n\n"); - } - - - if (p_tests & 0x10) { - printf("\n\n=== tests&0x10 (test device offsets)\n"); - HIPCHECK(hipDeviceReset()); - size_t maxSize = 256 * 1024; - memcpytest2_offsets(maxSize, true, false); - memcpytest2_offsets(maxSize, true, false); - memcpytest2_offsets(maxSize, true, false); - } - - - if (p_tests & 0x20) { - printf("\n\n=== tests&0x10 (test device offsets)\n"); - HIPCHECK(hipDeviceReset()); - size_t maxSize = 256 * 1024; - memcpytest2_offsets(maxSize, false, true); - memcpytest2_offsets(maxSize, false, true); - memcpytest2_offsets(maxSize, false, true); - } - - - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy2D.cpp b/tests/src/runtimeApi/memory/hipMemcpy2D.cpp deleted file mode 100644 index b1ec178b4c..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy2D.cpp +++ /dev/null @@ -1,341 +0,0 @@ -/* - Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -// Testcase Description: This test case achieves three scenarios -// 1) Verifies the working of Memcpy2D API negative scenarios by -// Pass NULL to destination pointer -// Pass NULL to Source pointer -// Pass NULL to both Source and destination pointers -// Pass same pointer to both source and destination pointers. -// Pass width greater than spitch/dpitch -// 2) Verifies hipMemcpy2D API by -// pass 0 to destionation pitch -// pass 0 to source pitch -// pass 0 to both source and destination pitches -// pass 0 to width -// pass 0 to height -// 3) Verifies working of Memcpy2D API by performing D2H and -// H2D memory kind copies -// 4) Verifies working of Memcpy2D API by performing D2D -// in same GPU device and the peer GPU device. -// 5) Verify hipMemcpy2D API on pinned host memory in same and peer GPU devices - - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST_NAMED: %t hipMemcpy2D_NegativeTest --tests 1 - * TEST_NAMED: %t hipMemcpy2D_H2D_D2H --tests 3 - * TEST_NAMED: %t hipMemcpy2D_D2D --tests 4 - * TEST_NAMED: %t hipMemcpy2D_PinnedMemory --tests 5 - * HIT_END - */ - -#include "test_common.h" - -#define NUM_H 256 -#define NUM_W 256 -#define COLUMNS 8 -#define ROWS 8 - - -class Memcpy2D { - char *A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_h{nullptr}, *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(char)}; - size_t sizeElements{width * NUM_H}; - size_t elements{NUM_W * NUM_H}; - bool ValidateResult(char *result, int compare); - public: - void AllocateMemory(); - void DeAllocateMemory(); - bool Memcpy2D_NegativeTest(); - bool Memcpy2D_NegativeTest_SizeCheck(); - bool Memcpy2D_H2D_D2HKind(); - bool Memcpy2D_D2DKind_SameGPU(); - bool Memcpy2D_D2DKind_MultiGPU(); - bool Memcpy2D_PinnedMemory_SameGPU(); - bool Memcpy2D_PinnedMemory_MultiGPU(); -}; - -bool Memcpy2D::ValidateResult(char *result, int compare) { - int count = 0; - for (int row = 0; row < ROWS; row++) { - for (int column = 0; column < COLUMNS; column++) { - if (result[(row * NUM_H) + column] != compare) { - return false; - } - ++count; - } - } - return true; -} - -void Memcpy2D::AllocateMemory() { - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != nullptr); - B_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(B_h != nullptr); - C_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(C_h != nullptr); - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIPCHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - for (size_t i=0; i < elements; i++) { - A_h[i] = 3; - B_h[i] = 4; - C_h[i] = 123; - } -} - -void Memcpy2D::DeAllocateMemory() { - HIPCHECK(hipFree(A_d)); HIPCHECK(hipFree(B_d)); - free(A_h); free(B_h); free(C_h); -} - - -bool Memcpy2D::Memcpy2D_H2D_D2HKind() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool testResult = true; - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - // hipMemcpy Device to Host - HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, - COLUMNS, ROWS, hipMemcpyDeviceToHost)); - testResult = ValidateResult(A_h, memsetval); - // hipMemcpy Host to Device and validating - // the result by copying the device data to host data - HIPCHECK(hipMemcpy2D(B_d, pitch_B, B_h, width, - COLUMNS, ROWS, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy2D(C_h, width, B_d, pitch_B, - COLUMNS, ROWS, hipMemcpyDeviceToHost)); - testResult &= ValidateResult(C_h, B_h[0]); - DeAllocateMemory(); - return testResult; -} - -bool Memcpy2D::Memcpy2D_PinnedMemory_SameGPU() { - HIPCHECK(hipSetDevice(0)); - bool testResult = true; - AllocateMemory(); - char *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipMemcpy2D(D_h, width, A_d, pitch_A, - COLUMNS, ROWS, hipMemcpyDeviceToHost)); - testResult = ValidateResult(D_h, memsetval); - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - return testResult; -} - -bool Memcpy2D::Memcpy2D_PinnedMemory_MultiGPU() { - bool testResult = true; - int numDevices = 0; - int canAccessPeer = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - // Check for peer devices and performing D2D on the devices - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipSetDevice(1)); - char *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); - HIPCHECK(hipMemcpy2D(D_h, width, A_d, pitch_A, - COLUMNS, ROWS, hipMemcpyDeviceToHost)); - testResult = ValidateResult(D_h, memsetval); - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - } else { - printf("Machine does not seem to have P2P Capabilities, Empty Pass"); - } - } else { - printf("Testcase Skipped as no of devices < 2"); - } - return testResult; -} - -bool Memcpy2D::Memcpy2D_D2DKind_SameGPU() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool testResult = true; - // Performs D2D on same GPU device - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipMemcpy2D(B_d, pitch_B, A_d, - pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice)); - HIPCHECK(hipMemcpy2D(B_h, width, B_d, pitch_B, - COLUMNS, ROWS, hipMemcpyDeviceToHost)); - testResult = ValidateResult(B_h, memsetval); - DeAllocateMemory(); - return testResult; -} -bool Memcpy2D::Memcpy2D_D2DKind_MultiGPU() { - int numDevices = 0; - bool testResult = true; - int canAccessPeer = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - for (int j =1; j < numDevices; j++) { - hipDeviceCanAccessPeer(&canAccessPeer, 0, j); - // Check for peer devices and performing D2D on the devices - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipSetDevice(j)); - char *X_d{nullptr}; - size_t pitch_X; - HIPCHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - HIPCHECK(hipMemcpy2D(X_d, pitch_X, A_d, - pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice)); - HIPCHECK(hipMemcpy2D(C_h, width, X_d, - pitch_X, COLUMNS, ROWS, hipMemcpyDeviceToHost)); - testResult &= ValidateResult(C_h, memsetval); - HIPCHECK(hipFree(X_d)); - DeAllocateMemory(); - } else { - printf("Machine does not seem to have P2P between 0 & %d", j); - } - } - } else { - printf("skipped the testcase as no of devices is less than 2"); - } - return testResult; -} - -bool Memcpy2D::Memcpy2D_NegativeTest() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - hipError_t err; - err = hipMemcpy2D(A_h, width, nullptr, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed when source pointer are null"); - TestPassed = false; - } - // hipMemcpy2D API by Passing nullptr to destination - err = hipMemcpy2D(nullptr, width, A_d, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed when dest pointer are null"); - TestPassed = false; - } - // hipMemcpy2D by Passing nullptr to both Source and Destination ptr - err = hipMemcpy2D(nullptr, width, nullptr, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed when both source and dest pointer are null"); - TestPassed = false; - } - // hipMemcpy2D API where width is greater than destination pitch - err = hipMemcpy2D(A_h, 10, A_d, pitch_A, - NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed where width is greater than destination pitch"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -bool Memcpy2D::Memcpy2D_NegativeTest_SizeCheck() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - hipError_t err; - // hipMemcpy2D API where Destination Pitch is zero - err = hipMemcpy2D(A_h, 0, A_d, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed when source pitch is null"); - TestPassed = false; - } - // hipMemcpy2D API where Source Pitch is zero - err = hipMemcpy2D(A_h, width, A_d, - 0, NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed when destination pitch is null"); - TestPassed = false; - } - // hipMemcpy2D API where Source and Destination Pitch are zero - err = hipMemcpy2D(A_h, 0, A_d, - 0, NUM_W, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2D failed when source and destination pitches are null"); - TestPassed = false; - } - // hipMemcpy2D API where height is zero - // hipMemcpy2D API would return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2D(A_h, width, A_d, - pitch_A, NUM_W, 0, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, 3); - } else { - printf("hipMemcpy2D failed when Height is null"); - TestPassed = false; - } - // hipMemcpy2D API where width is zero - // hipMemcpy2D API would return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2D(A_h, width, A_d, - pitch_A, 0, NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, 3); - } else { - printf("hipMemcpy2D failed when Width is null"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -int main(int argc, char* argv[]) { - bool TestPassed = true; - checkImageSupport(); - Memcpy2D Memcpy2DObj; - HipTest::parseStandardArguments(argc, argv, false); - if (p_tests == 1) { - TestPassed &= Memcpy2DObj.Memcpy2D_NegativeTest(); - } else if (p_tests == 2) { - TestPassed &= Memcpy2DObj.Memcpy2D_NegativeTest_SizeCheck(); - } else if (p_tests == 3) { - TestPassed &= Memcpy2DObj.Memcpy2D_H2D_D2HKind(); - } else if (p_tests == 4) { - TestPassed &= Memcpy2DObj.Memcpy2D_D2DKind_SameGPU(); - TestPassed &= Memcpy2DObj.Memcpy2D_D2DKind_MultiGPU(); - } else if (p_tests == 5) { - TestPassed &= Memcpy2DObj.Memcpy2D_PinnedMemory_SameGPU(); - TestPassed &= Memcpy2DObj.Memcpy2D_PinnedMemory_MultiGPU(); - } else { - failed("Didnt receive any valid option. Try options 1 to 5\n"); - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp deleted file mode 100644 index a22089f363..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp +++ /dev/null @@ -1,395 +0,0 @@ -/* - Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -// Testcase Description: This test case achieves three scenarios -// 1) Verifies the working of Memcpy2DAsync API negative scenarios by -// Pass NULL to destination pointer -// Pass NULL to Source pointer -// Pass NULL to both Source and destination pointers -// Pass same pointer to both source and destination pointers. -// Pass width greater than spitch/dpitch -// 2) Verifies hipMemcpy2DAsync API by -// pass 0 to destionation pitch -// pass 0 to source pitch -// pass 0 to both source and destination pitches -// pass 0 to width -// pass 0 to height -// 3) Verifies working of Memcpy2DAsync API by performing D2H -// and H2D memory kind copies -// 4) Verifies working of Memcpy2DAsync API by performing D2D -// on same GPU device and the peer GPU device. -// 5) Verifies working hipMemcpy2DAsync API along with launching Kernel -// 6) Veirfy hipMemcpy2DAsync by allocating pinned host memory - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST_NAMED: %t hipMemcpy2DAsync_NegativeTest --tests 1 - * TEST_NAMED: %t hipMemcpy2DAsync_H2D_D2H --tests 3 - * TEST_NAMED: %t hipMemcpy2DAsync_D2D --tests 4 - * TEST_NAMED: %t hipMemcpy2DAsync_WithKernel --tests 5 - * TEST_NAMED: %t hipMemcpy2DAsync_PinnedMemory --tests 6 - * HIT_END - */ - -#include "test_common.h" - -#define NUM_H 256 -#define NUM_W 256 -#define COLUMNS 8 -#define ROWS 8 -#define ITER 10 - -__global__ void -vector_square(char* B_d, char* C_d, size_t elements) { - for (int i=0 ; i < elements ; i++) { - C_d[i] = B_d[i] * B_d[i]; - } -} - -class Memcpy2DAsync { - char *A_h{nullptr}, *A_d{nullptr}, *B_h{nullptr}, - *B_d{nullptr}, *C_h{nullptr}, *C_d{nullptr}; - size_t pitch_A, pitch_B, pitch_C; - size_t width{NUM_W * sizeof(char)}; - size_t sizeElements{width * NUM_H}; - size_t elements{NUM_W * NUM_H}; - hipStream_t stream; - bool ValidateResult(char *result, int compare); - public: - void AllocateMemory(); - void DeAllocateMemory(); - bool Memcpy2DAsync_NegativeTest(); - bool Memcpy2DAsync_NegativeTest_SizeCheck(); - bool Memcpy2DAsync_H2D_D2HKind(); - bool Memcpy2DAsync_D2DKind_SameGPU(); - bool Memcpy2DAsync_D2DKind_MultiGPU(); - bool Memcpy2DAsync_WithKernel(); - bool Memcpy2DAsync_PinnedMemory_SameGPU(); - bool Memcpy2DAsync_PinnedMemory_MultiGPU(); -}; - -void Memcpy2DAsync::AllocateMemory() { - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != nullptr); - B_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(B_h != nullptr); - C_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(C_h != nullptr); - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIPCHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - HIPCHECK(hipMallocPitch(reinterpret_cast(&C_d), - &pitch_C, width, NUM_H)); - for (size_t i=0; i < elements; i++) { - A_h[i] = 3; - B_h[i] = 4; - C_h[i] = 123; - } - HIPCHECK(hipStreamCreate(&stream)); -} - -void Memcpy2DAsync::DeAllocateMemory() { - HIPCHECK(hipFree(A_d)); HIPCHECK(hipFree(B_d)); HIPCHECK(hipFree(C_d)); - free(A_h); free(B_h); - HIPCHECK(hipStreamDestroy(stream)); -} - -bool Memcpy2DAsync::ValidateResult(char *result, int compare) { - for (int row = 0; row < ROWS; row++) { - for (int column = 0; column < COLUMNS; column++) { - if (result[(row * NUM_H) + column] != compare) { - return false; - } - } - } - return true; -} - -bool Memcpy2DAsync::Memcpy2DAsync_H2D_D2HKind() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool testResult = true; - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - // hipMemcpy Device to Host - HIPCHECK(hipMemcpy2DAsync(A_h, width, A_d, pitch_A, - COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - testResult = ValidateResult(A_h, memsetval); - // hipMemcpy Host to Device and validating the - // result by copying the device data to host data - HIPCHECK(hipMemcpy2DAsync(B_d, pitch_B, B_h, width, - COLUMNS, ROWS, hipMemcpyHostToDevice, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy2DAsync(C_h, width, B_d, pitch_B, - COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - testResult &= ValidateResult(C_h, B_h[0]); - DeAllocateMemory(); - return testResult; -} - -bool Memcpy2DAsync::Memcpy2DAsync_PinnedMemory_SameGPU() { - HIPCHECK(hipSetDevice(0)); - bool testResult = true; - AllocateMemory(); - char *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipMemcpy2DAsync(D_h, width, A_d, pitch_A, - COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - testResult = ValidateResult(D_h, memsetval); - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - return testResult; -} - -bool Memcpy2DAsync::Memcpy2DAsync_PinnedMemory_MultiGPU() { - bool testResult = true; - int numDevices = 0; - int canAccessPeer = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - // Check for peer devices and performing D2D on the devices - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - char *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); - AllocateMemory(); - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipDeviceSynchronize()); - HIPCHECK(hipSetDevice(1)); - hipStream_t p_stream; - HIPCHECK(hipStreamCreate(&p_stream)); - HIPCHECK(hipMemcpy2DAsync(D_h, width, A_d, pitch_A, - COLUMNS, ROWS, hipMemcpyDeviceToHost, p_stream)); - HIPCHECK(hipStreamSynchronize(p_stream)); - testResult = ValidateResult(D_h, memsetval); - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - HIPCHECK(hipStreamDestroy(p_stream)); - } else { - printf("skipping the tescase as device does not have P2P"); - } - } else { - printf("skipped the testcase as no of devices is less than 2"); - } - return testResult; -} - -bool Memcpy2DAsync::Memcpy2DAsync_D2DKind_SameGPU() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool testResult = true; - // Performs D2D on same GPU device - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, - pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy2DAsync(B_h, width, B_d, pitch_B, - COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - testResult = ValidateResult(B_h, memsetval); - DeAllocateMemory(); - return testResult; -} - -bool Memcpy2DAsync::Memcpy2DAsync_D2DKind_MultiGPU() { - int numDevices = 0; - bool testResult = true; - int canAccessPeer = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - for (int j =1; j < numDevices; j++) { - hipDeviceCanAccessPeer(&canAccessPeer, 0, j); - // Check for peer devices and performing D2D on the devices - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - HIPCHECK(hipSetDevice(j)); - hipStream_t p_stream; - HIPCHECK(hipStreamCreate(&p_stream)); - char *X_d{nullptr}; - size_t pitch_X; - HIPCHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - HIPCHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, - pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice, p_stream)); - HIPCHECK(hipStreamSynchronize(p_stream)); - HIPCHECK(hipMemcpy2DAsync(C_h, width, X_d, - pitch_X, COLUMNS, ROWS, hipMemcpyDeviceToHost, p_stream)); - HIPCHECK(hipStreamSynchronize(p_stream)); - testResult &= ValidateResult(C_h, memsetval); - HIPCHECK(hipFree(X_d)); - DeAllocateMemory(); - HIPCHECK(hipStreamDestroy(p_stream)); - } else { - printf("Machine does not seem to have P2P between 0 & %d", j); - } - } - } else { - printf("skipped the testcase as no of devices is less than 2"); - } - return testResult; -} - -bool Memcpy2DAsync::Memcpy2DAsync_WithKernel() { - HIPCHECK(hipSetDevice(0)); - unsigned int ThreadsperBlock = 1; - unsigned int numBlocks = 1; - bool testResult = true; - AllocateMemory(); - for (int k = 0 ; k < ITER ; k++) { - HIPCHECK(hipMemset2D(B_d, pitch_B, B_h[0], NUM_W, NUM_H)); - hipLaunchKernelGGL(vector_square, numBlocks, ThreadsperBlock, 0, - stream, B_d, C_d, elements); - HIPCHECK(hipMemcpy2DAsync(B_d, pitch_B, C_d, pitch_C, COLUMNS, ROWS, - hipMemcpyDeviceToDevice, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy2DAsync(C_h, width, B_d, pitch_B, - COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - testResult &= ValidateResult(C_h, B_h[0]*B_h[0]); - } - DeAllocateMemory(); - return testResult; -} - -bool Memcpy2DAsync::Memcpy2DAsync_NegativeTest() { - HIPCHECK(hipSetDevice(0)); - bool TestPassed = true; - AllocateMemory(); - hipError_t err; - // hipMemcpy2DAsyncAsync API by Passing nullptr to Source Pointer` - err = hipMemcpy2DAsync(A_h, width, nullptr, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpyAsync failed when source pointer is null"); - TestPassed = false; - } - // hipMemcpy2DAsyncAsync API by Passing nullptr to Destination Pointer - err = hipMemcpy2DAsync(nullptr, width, A_d, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpyAsync failed when dest pointer is null"); - TestPassed = false; - } - // hipMemcpy2DAsyncAsync API by Passing nullptr - // to both Source and Destination ptr - err = hipMemcpy2DAsync(nullptr, width, nullptr, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpyAsync failed when both source and dest pointer are null"); - TestPassed = false; - } - // hipMemcpy2DAsyncAsync API where width is more than destination pitch - err = hipMemcpy2DAsync(A_h, 10, A_d, pitch_A, - NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpyAsync failed where width is more than destination pitch"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -bool Memcpy2DAsync::Memcpy2DAsync_NegativeTest_SizeCheck() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - hipError_t err; - // hipMemcpy2DAsync API where Destination Pitch is zero - err = hipMemcpy2DAsync(A_h, 0, A_d, - pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DAsync failed when source pitch is null"); - TestPassed = false; - } - // hipMemcpy2DAsync API where Source Pitch is zero - err = hipMemcpy2DAsync(A_h, width, A_d, - 0, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DAsync failed when destination pitch is null"); - TestPassed = false; - } - // hipMemcpy2DAsync API where Source and Destination Pitch are zero - err = hipMemcpy2DAsync(A_h, 0, A_d, - 0, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DAsync failed source and destination pitches are null"); - TestPassed = false; - } - // hipMemcpy2DAsync API where height is zero - // hipMemcpy2DAsync API would return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2DAsync(A_h, width, A_d, - pitch_A, NUM_W, 0, hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, 3); - } else { - printf("hipMemcpy2DAsync failed when Width is null"); - TestPassed = false; - } - // hipMemcpy2DAsync API where width is zero - // hipMemcpy2DAsync API would return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2DAsync(A_h, width, A_d, - pitch_A, 0, NUM_H, hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, 3); - } else { - printf("hipMemcpy2DAsync failed when Width is null"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -int main(int argc, char* argv[]) { - Memcpy2DAsync Memcpy2DAsyncObj; - HipTest::parseStandardArguments(argc, argv, false); - bool TestPassed = true; - if (p_tests == 1) { - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_NegativeTest(); - } else if (p_tests == 2) { - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_NegativeTest_SizeCheck(); - } else if (p_tests == 3) { - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_H2D_D2HKind(); - } else if (p_tests == 4) { - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_D2DKind_SameGPU(); - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_D2DKind_MultiGPU(); - } else if (p_tests == 5) { - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_WithKernel(); - } else if (p_tests == 6) { - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_PinnedMemory_MultiGPU(); - TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_PinnedMemory_SameGPU(); - } else { - failed("Didnt receive any valid option. Try options 1 to 6\n"); - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy2DFromArray.cpp b/tests/src/runtimeApi/memory/hipMemcpy2DFromArray.cpp deleted file mode 100644 index c8ff891eba..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy2DFromArray.cpp +++ /dev/null @@ -1,315 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* - * Test Scenarios: - * Scenario 1 : - * 1. hipMemcpy2DFromArray simple scenarios - * 2. Extent Validation Scenarios - * 3. Device context Change - * 4. Negative Scenarios - * 5. Pinned Host Memory from same and Peer GPU. - */ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipMemcpy2DFromArray_simple --tests 1 - * TEST_NAMED: %t hipMemcpy2DFromArray_ExtentValidation --tests 2 - * TEST_NAMED: %t hipMemcpy2DFromArray_DeviceContextChange --tests 3 - * TEST_NAMED: %t hipMemcpy2DFromArray_NegativeTests --tests 4 - * TEST_NAMED: %t hipMemcpy2DFromArray_PinnedHostMemory --tests 5 - * HIT_END - */ -#include "test_common.h" - -#define NUM_W 10 -#define NUM_H 10 -#define INITIAL_VAL 8 - -template -class Memcpy2DFromArray { - hipArray *A_d{nullptr}; - T *hData{nullptr}, *A_h{nullptr}; - size_t width, height; - size_t elements{NUM_W * NUM_H}; - hipError_t err; - public: - void AllocateMemory(); - void DeAllocateMemory(); - bool hipMemcpy2DFromArray_NegativeTests(); - bool hipMemcpy2DFromArray_simple(); - bool hipMemcpy2DFromArray_SizeCheck(); - bool hipMemcpy2DFromArray_PeerDeviceContext(); - bool hipMemcpy2DFromArray_PinnedHostMemory_SameGPU(); - bool hipMemcpy2DFromArray_PinnedHostMemory_PeerGPU(); - bool ValidateResult(T* result, T compare); -}; -template -void Memcpy2DFromArray::AllocateMemory() { - width = NUM_W * sizeof(T); - height = NUM_H; - hData = reinterpret_cast(malloc(width * NUM_H)); - A_h = reinterpret_cast(malloc(width * NUM_H)); - for (int i = 0; i < elements; i++) { - A_h[i] = 1; - hData[i] = INITIAL_VAL; - } - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HIPCHECK(hipMallocArray(&A_d, &desc, NUM_W, NUM_H, hipArrayDefault)); - HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, hData, width, - width, NUM_H, hipMemcpyHostToDevice)); -} -template -bool Memcpy2DFromArray::ValidateResult(T *result, T compare) { - bool TestPassed = true; - for (int i = 0; i < NUM_W; i++) { - for (int j = 0; j < NUM_H; j++) { - if (result[(i*NUM_H) + j] != compare) { - TestPassed = false; - } - } - } - return TestPassed; -} -template -void Memcpy2DFromArray::DeAllocateMemory() { - hipFreeArray(A_d); - free(hData); - free(A_h); -} - -template -bool Memcpy2DFromArray::hipMemcpy2DFromArray_PinnedHostMemory_SameGPU() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - T *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), width * NUM_H)); - err = hipMemcpy2DFromArray(D_h, width, A_d, - 0, 0, width, - NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed = ValidateResult(D_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArray failed for PinnedHostMemory same GPU\n"); - TestPassed = false; - } - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - return TestPassed; -} - -template -bool Memcpy2DFromArray::hipMemcpy2DFromArray_PinnedHostMemory_PeerGPU() { - bool TestPassed = true; - int canAccessPeer = 0; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - // Check for peer devices and performing D2D on the devices - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipSetDevice(1)); - T *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), width * NUM_H)); - err = hipMemcpy2DFromArray(D_h, width, A_d, - 0, 0, width, - NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed = ValidateResult(D_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArray failed for PinnedHostMemory Peer GPU\n"); - TestPassed = false; - } - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - } else { - printf("Machine does not seem to have P2P Capabilities, Empty Pass"); - } - return TestPassed; -} - -template -bool Memcpy2DFromArray::hipMemcpy2DFromArray_simple() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - err = hipMemcpy2DFromArray(A_h, width, A_d, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArray failed for simple copy\n"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} -template -bool Memcpy2DFromArray::hipMemcpy2DFromArray_PeerDeviceContext() { - bool TestPassed = true; - int peerAccess = 0; - HIPCHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1)); - if (!peerAccess) { - printf("Skipped the test as there is no peer access\n"); - } else { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipSetDevice(1)); - err = hipMemcpy2DFromArray(A_h, width, A_d, - 0, 0, width, - NUM_H, hipMemcpyDeviceToHost); - - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArray failed for peer device context\n"); - TestPassed = false; - } - DeAllocateMemory(); - } - return TestPassed; -} - -template -bool Memcpy2DFromArray::hipMemcpy2DFromArray_SizeCheck() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - // hipMemcpy2DFromArray API where Destination width is 0 - err = hipMemcpy2DFromArray(A_h, 0, A_d, - 0, 0, NUM_W*sizeof(T), - NUM_H, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArray failed when destination width is zero"); - TestPassed = false; - } - - // hipMemcpy2DFromArray API where height is zero - // hipMemcpy2DFromArray API would return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2DFromArray(A_h, width, A_d, - 0, 0, NUM_W*sizeof(T), - 0, hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed &= ValidateResult(A_h, 1); - } else { - printf("hipMemcpy2DFromArray failed when Height is null"); - TestPassed = false; - } - // hipMemcpy2DFromArray API where width is zero - // hipMemcpy2DFromArray API would return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2DFromArray(A_h, width, A_d, - 0, 0, 0, NUM_H, - hipMemcpyDeviceToHost); - if (err == hipSuccess) { - TestPassed &= ValidateResult(A_h, 1); - } else { - printf("hipMemcpy2DFromArray failed when Width is null"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -template -bool Memcpy2DFromArray::hipMemcpy2DFromArray_NegativeTests() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - // Passing nullptr to destination - err = hipMemcpy2DFromArray(nullptr, width, A_d, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArray failed when dest pointer are null"); - TestPassed = false; - } - // Passing nullptr to source - err = hipMemcpy2DFromArray(A_h, width, nullptr, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArray failed when source pointer are null"); - TestPassed = false; - } - // Passing offset 1 and trying to perform array out of bounds - err = hipMemcpy2DFromArray(A_h, width, A_d, 1, - 1, width, NUM_H, - hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArray failed offset 1 and perform full copy"); - TestPassed = false; - } - // Copying array more than allocated (array out of bounds) - err = hipMemcpy2DFromArray(A_h, width, A_d, 0, - 0, width+2, NUM_H+2, - hipMemcpyDeviceToHost); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArray failed where array is out of bound"); - TestPassed = false; - } - - DeAllocateMemory(); - return TestPassed; -} - - -int main(int argc, char **argv) { - bool TestPassed = true; - HipTest::parseStandardArguments(argc, argv, false); - Memcpy2DFromArray Array_obj; - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (p_tests == 1) { - TestPassed = Array_obj.hipMemcpy2DFromArray_simple(); - } else if (p_tests == 2) { - TestPassed &= Array_obj.hipMemcpy2DFromArray_SizeCheck(); - } else if (p_tests == 3) { - if (numDevices > 1) { -#ifndef _WIN64 - TestPassed &= Array_obj.hipMemcpy2DFromArray_PeerDeviceContext(); -#else - printf("xgmi memory test not supported on windows\n"); -#endif - } else { - printf("skipped the testcase as noof devices <2\n"); - } - } else if (p_tests == 4) { - TestPassed &= Array_obj.hipMemcpy2DFromArray_NegativeTests(); - } else if (p_tests == 5) { - if (numDevices > 1) { -#ifndef _WIN64 - TestPassed &= Array_obj.hipMemcpy2DFromArray_PinnedHostMemory_SameGPU(); - TestPassed &= Array_obj.hipMemcpy2DFromArray_PinnedHostMemory_PeerGPU(); -#else - printf("xgmi memory test not supported on windows\n"); -#endif - } else { - printf("skipped the testcases as noof devices <2\n"); - } - } else { - printf("Provide a valid option \n"); - TestPassed = false; - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy2DFromArrayAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpy2DFromArrayAsync.cpp deleted file mode 100644 index f4b2ce94f7..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy2DFromArrayAsync.cpp +++ /dev/null @@ -1,321 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Test Scenarios: - * Scenario 1 : - * 1. hipMemcpy2DFromArrayAsync simple scenarios - * 2. Extent Validation Scenarios - * 3. Device context Change - * 4. Negative Scenarios - * 5. Pinned Host Memory from same and Peer GPU. - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipMemcpy2DFromArrayAsync_Simple --tests 1 - * TEST_NAMED: %t hipMemcpy2DFromArrayAsync_ExtentValidation --tests 2 - * TEST_NAMED: %t hipMemcpy2DFromArrayAsync_PeerDeviceContext --tests 3 - * TEST_NAMED: %t hipMemcpy2DFromArrayAsync_NegativeTests --tests 4 - * TEST_NAMED: %t hipMemcpy2DFromArrayAsync_PinnedHostMemory --tests 5 - * HIT_END - */ -#include "test_common.h" - -#define NUM_W 10 -#define NUM_H 10 -#define INITIAL_VAL 8 - -template -class Memcpy2DFromArrayAsync { - hipArray *A_d{nullptr}; - T *hData{nullptr}, *A_h{nullptr}; - size_t width, height; - size_t elements{NUM_W * NUM_H}; - hipStream_t stream; - hipError_t err; - public: - void AllocateMemory(); - void DeAllocateMemory(); - bool hipMemcpy2DFromArrayAsync_NegativeTests(); - bool hipMemcpy2DFromArrayAsync_simple(); - bool hipMemcpy2DFromArrayAsync_SizeCheck(); - bool hipMemcpy2DFromArrayAsync_PeerDeviceContext(); - bool hipMemcpy2DFromArrayAsync_PinnedHost_SameGPU(); - bool hipMemcpy2DFromArrayAsync_PinnedHost_PeerGPU(); - bool ValidateResult(T* result, T compare); -}; -template -void Memcpy2DFromArrayAsync::AllocateMemory() { - width = NUM_W * sizeof(T); - height = NUM_H; - hData = reinterpret_cast(malloc(width * NUM_H)); - A_h = reinterpret_cast(malloc(width * NUM_H)); - for (int i = 0; i < elements; i++) { - A_h[i] = 1; - hData[i] = INITIAL_VAL; - } - HIPCHECK(hipStreamCreate(&stream)); - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HIPCHECK(hipMallocArray(&A_d, &desc, NUM_W, NUM_H, hipArrayDefault)); - HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, hData, width, - width, NUM_H, hipMemcpyHostToDevice)); -} -template -bool Memcpy2DFromArrayAsync::ValidateResult(T *result, T compare) { - bool TestPassed = true; - for (int i = 0; i < NUM_W; i++) { - for (int j = 0; j < NUM_H; j++) { - if (result[(i*NUM_H) + j] != compare) { - TestPassed = false; - } - } - } - return TestPassed; -} -template -void Memcpy2DFromArrayAsync::DeAllocateMemory() { - hipFreeArray(A_d); - free(hData); - free(A_h); - HIPCHECK(hipStreamDestroy(stream)); -} - -template -bool Memcpy2DFromArrayAsync::hipMemcpy2DFromArrayAsync_PinnedHost_SameGPU() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - T *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), width * NUM_H)); - err = hipMemcpy2DFromArrayAsync(D_h, width, A_d, - 0, 0, NUM_W*sizeof(T), - NUM_H, hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed = ValidateResult(D_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArrayAsync failed for PinnedHostMemory same GPU\n"); - TestPassed = false; - } - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - return TestPassed; -} - -template -bool Memcpy2DFromArrayAsync::hipMemcpy2DFromArrayAsync_PinnedHost_PeerGPU() { - bool TestPassed = true; - int canAccessPeer = 0; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - // Check for peer devices and performing D2D on the devices - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipSetDevice(1)); - T *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), width * NUM_H)); - err = hipMemcpy2DFromArrayAsync(D_h, width, A_d, - 0, 0, NUM_W*sizeof(T), NUM_H, - hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed = ValidateResult(D_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArrayAsync failed PinnedHostMemory Peer GPU\n"); - TestPassed = false; - } - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - } else { - printf("Machine does not seem to have P2P Capabilities, Empty Pass"); - } - return TestPassed; -} - -template -bool Memcpy2DFromArrayAsync::hipMemcpy2DFromArrayAsync_simple() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - err = hipMemcpy2DFromArrayAsync(A_h, width, A_d, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArrayAsync failed for simple copy\n"); - TestPassed = false; - } - - DeAllocateMemory(); - return TestPassed; -} -template -bool Memcpy2DFromArrayAsync::hipMemcpy2DFromArrayAsync_PeerDeviceContext() { - bool TestPassed = true; - int peerAccess = 0; - HIPCHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1)); - if (!peerAccess) { - printf("Skipped the test as there is no peer access\n"); - } else { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipSetDevice(1)); - err = hipMemcpy2DFromArrayAsync(A_h, width, A_d, - 0, 0, NUM_W*sizeof(T), - NUM_H, hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, INITIAL_VAL); - } else { - printf("hipMemcpy2DFromArrayAsync failed for peer device context\n"); - TestPassed = false; - } - DeAllocateMemory(); - } - return TestPassed; -} - -template -bool Memcpy2DFromArrayAsync::hipMemcpy2DFromArrayAsync_SizeCheck() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - // hipMemcpy2DFromArrayAsync API where Destination width is 0 - err = hipMemcpy2DFromArrayAsync(A_h, 0, A_d, - 0, 0, NUM_W*sizeof(T), - NUM_H, hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArrayAsync failed when destination width is zero"); - TestPassed = false; - } - - // hipMemcpy2DFromArrayAsync API where height is zero - // hipMemcpy2DFromArrayAsync API return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2DFromArrayAsync(A_h, width, A_d, - 0, 0, NUM_W*sizeof(T), - 0, hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed &= ValidateResult(A_h, 1); - } else { - printf("hipMemcpy2DFromArrayAsync failed when Height is null"); - TestPassed = false; - } - // hipMemcpy2DFromArrayAsync API where width is zero - // hipMemcpy2DFromArrayAsync API return success for width and height as 0 - // Validating the result with the initialized value - err = hipMemcpy2DFromArrayAsync(A_h, width, A_d, - 0, 0, 0, NUM_H, - hipMemcpyDeviceToHost, stream); - HIPCHECK(hipStreamSynchronize(stream)); - if (err == hipSuccess) { - TestPassed &= ValidateResult(A_h, 1); - } else { - printf("hipMemcpy2DFromArrayAsync failed when Width is null"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -template -bool Memcpy2DFromArrayAsync::hipMemcpy2DFromArrayAsync_NegativeTests() { - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - bool TestPassed = true; - // Passing nullptr to destination - err = hipMemcpy2DFromArrayAsync(nullptr, width, A_d, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArrayAsync failed when dest pointer are null"); - TestPassed = false; - } - // Passing nullptr to source - err = hipMemcpy2DFromArrayAsync(A_h, width, nullptr, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArrayAsync failed when source pointer are null"); - TestPassed = false; - } - // Passing offset 1 and trying to perform array out of bounds - err = hipMemcpy2DFromArrayAsync(A_h, width, A_d, 1, - 1, width, NUM_H, - hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArrayAsync failed offset 1 and perform full copy"); - TestPassed = false; - } - // Copying array more than allocated (array out of bounds) - err = hipMemcpy2DFromArrayAsync(A_h, width, A_d, 0, - 0, width+2, NUM_H+2, - hipMemcpyDeviceToHost, stream); - if (err == hipSuccess) { - printf("hipMemcpy2DFromArrayAsync failed where array is out of bound"); - TestPassed = false; - } - - DeAllocateMemory(); - return TestPassed; -} - - -int main(int argc, char **argv) { - bool TestPassed = true; - HipTest::parseStandardArguments(argc, argv, false); - Memcpy2DFromArrayAsync ArrayAsync_obj; - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (p_tests == 1) { - TestPassed = ArrayAsync_obj.hipMemcpy2DFromArrayAsync_simple(); - } else if (p_tests == 2) { - TestPassed &= ArrayAsync_obj.hipMemcpy2DFromArrayAsync_SizeCheck(); - } else if (p_tests == 3) { - if (numDevices > 1) { - TestPassed &= ArrayAsync_obj. - hipMemcpy2DFromArrayAsync_PeerDeviceContext(); - } else { - printf("Skipping the testcases as numDevices <2\n"); - } - } else if (p_tests == 4) { - TestPassed &= ArrayAsync_obj.hipMemcpy2DFromArrayAsync_NegativeTests(); - } else if (p_tests == 5) { - if (numDevices > 1) { - TestPassed &= ArrayAsync_obj. - hipMemcpy2DFromArrayAsync_PinnedHost_SameGPU(); - TestPassed &= ArrayAsync_obj. - hipMemcpy2DFromArrayAsync_PinnedHost_PeerGPU(); - } else { - printf("Skipping the testcase as numDevices <2\n"); - } - } else { - printf("Provide a valid option \n"); - TestPassed = false; - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy2D_simple.cpp b/tests/src/runtimeApi/memory/hipMemcpy2D_simple.cpp deleted file mode 100644 index 039407fe8b..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy2D_simple.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ -/* Intension : Multidevice test to verify hipMemcpy2D(Async) behavior. Test verifies hipMemcpy2D behavior - when one of the device memory is not owned by current device. - i) H2D & D2H -> Device memory is not owned by current device - ii) D2D -> source memory is not owned by current device - Note : To make it simple at present checking API functionality not validating values. - */ -#include "test_common.h" -using namespace std; -//#defines -#define Nrows 8 -#define Ncols 8 -// Globals -int Nbytes = Nrows * Ncols * sizeof(char); -bool Copy2D(bool syncCopy, hipMemcpyKind kind) { - char* hPtr = nullptr; - char* devPtr = nullptr; - size_t pitch; - int canAccess = 0; - HIPCHECK(hipDeviceCanAccessPeer(&canAccess, 1, 0)); - if (!canAccess) { - cout << "Exit early as Non-Peer config\n"; - // Returning true as test should not be executed on non-peer configs - return true; - } - hPtr = (char*)malloc(Nrows * Ncols); - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMallocPitch((void**)&devPtr, (size_t*)&pitch, Ncols * sizeof(char), Nrows)); - HIPCHECK(hipSetDevice(1)); - if (syncCopy) { - // API under test : Copy triggered from dev1 but device memory allocated on dev0 - if (kind == hipMemcpyHostToDevice) { - HIPCHECK(hipMemcpy2D(devPtr, pitch, hPtr, Ncols * sizeof(char), Ncols * sizeof(char), Nrows, - hipMemcpyHostToDevice)); - } else if (kind == hipMemcpyDeviceToHost) { - HIPCHECK(hipMemcpy2D(hPtr, Ncols * sizeof(char), devPtr, pitch, Ncols * sizeof(char), Nrows, - hipMemcpyDeviceToHost)); - } else if (kind == hipMemcpyDeviceToDevice) { - char* devPtr1; - size_t pitch1; - HIPCHECK(hipMallocPitch((void**)&devPtr1, (size_t*)&pitch1, Ncols * sizeof(char), Nrows)); - // API under test : Copy triggered from dev1 but device memory allocated on dev0 - HIPCHECK(hipMemcpy2D(devPtr1, pitch1, devPtr, pitch, Ncols * sizeof(char), Nrows, - hipMemcpyDeviceToDevice)); - HIPCHECK(hipFree(devPtr1)); - } - } else { - hipStream_t pStream; - HIPCHECK(hipStreamCreate(&pStream)); - if (kind == hipMemcpyHostToDevice) { - HIPCHECK(hipMemcpy2DAsync(devPtr, pitch, hPtr, Ncols * sizeof(char), Ncols * sizeof(char), - Nrows, hipMemcpyHostToDevice, pStream)); - } else if (kind == hipMemcpyDeviceToHost) { - HIPCHECK(hipMemcpy2DAsync(hPtr, Ncols * sizeof(char), devPtr, pitch, Ncols * sizeof(char), - Nrows, hipMemcpyDeviceToHost, pStream)); - } else if (kind == hipMemcpyDeviceToDevice) { - char* devPtr1; - size_t pitch1; - HIPCHECK(hipMallocPitch((void**)&devPtr1, (size_t*)&pitch1, Ncols * sizeof(char), Nrows)); - HIPCHECK(hipMemcpy2DAsync(devPtr1, pitch1, devPtr, pitch, Ncols * sizeof(char), Nrows, - hipMemcpyDeviceToDevice, pStream)); - HIPCHECK(hipFree(devPtr1)); - } - HIPCHECK(hipStreamSynchronize(pStream)); - HIPCHECK(hipStreamDestroy(pStream)); - } - // Free allocations - HIPCHECK(hipFree(devPtr)); - free(hPtr); - return true; -} -int main() { - int numDev = 0; - HIPCHECK(hipGetDeviceCount(&numDev)); - if (numDev == 0) { - failed("No device found"); - } else if (numDev == 1) { - passed(); - } - bool status = true; - status &= Copy2D(true, hipMemcpyHostToDevice); // Sync copy, H2D - status &= Copy2D(false, hipMemcpyHostToDevice); // Async copy, H2D - status &= Copy2D(true, hipMemcpyDeviceToHost); // Sync copy, D2H - status &= Copy2D(false, hipMemcpyDeviceToHost); // Async copy, D2H - status &= Copy2D(true, hipMemcpyDeviceToDevice); // Sync copy, D2D - status &= Copy2D(false, hipMemcpyDeviceToDevice); // Async copy, D2D - // Validate final result - if (!status) { - failed("Failed"); - } - passed(); -} \ No newline at end of file diff --git a/tests/src/runtimeApi/memory/hipMemcpy3D.cpp b/tests/src/runtimeApi/memory/hipMemcpy3D.cpp deleted file mode 100644 index 59277a2bc0..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy3D.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -template -void runTest(int width,int height,int depth, hipChannelFormatKind formatKind) -{ - unsigned int size = width * height * depth * sizeof(T); - T* hData = (T*) malloc(size); - memset(hData, 0, size); - - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width +k] = i*width*height + j*width + k; - } - } - } - printf("test- sizeof(T) =%zu\n", sizeof(T)); - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8, 0, 0, 0, formatKind); - hipArray *arr,*arr1; - - HIPCHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height, depth), hipArrayDefault)); - HIPCHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height, depth), hipArrayDefault)); - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); - myparms.dstArray = arr; - myparms.extent = make_hipExtent(width , height, depth); -#ifdef __HIP_PLATFORM_NVIDIA__ - myparms.kind = cudaMemcpyHostToDevice; -#else - myparms.kind = hipMemcpyHostToDevice; -#endif - HIPCHECK(hipMemcpy3D(&myparms)); - HIPCHECK(hipDeviceSynchronize()); - //Array to Array - memset(&myparms,0x0, sizeof(hipMemcpy3DParms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcArray = arr; - myparms.dstArray = arr1; - myparms.extent = make_hipExtent(width, height, depth); -#ifdef __HIP_PLATFORM_NVIDIA__ - myparms.kind = cudaMemcpyDeviceToDevice; -#else - myparms.kind = hipMemcpyDeviceToDevice; -#endif - HIPCHECK(hipMemcpy3D(&myparms)); - HIPCHECK(hipDeviceSynchronize()); - - T *hOutputData = (T*) malloc(size); - memset(hOutputData, 0, size); - //Device to host - memset(&myparms,0x0, sizeof(hipMemcpy3DParms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(T), width, height); - myparms.srcArray = arr1; - myparms.extent = make_hipExtent(width, height, depth); -#ifdef __HIP_PLATFORM_NVIDIA__ - myparms.kind = cudaMemcpyDeviceToHost; -#else - myparms.kind = hipMemcpyDeviceToHost; -#endif - HIPCHECK(hipMemcpy3D(&myparms)); - HIPCHECK(hipDeviceSynchronize()); - - // Check result - HipTest::checkArray(hData,hOutputData,width,height,depth); - hipFreeArray(arr); - hipFreeArray(arr1); - free(hData); - free(hOutputData); -} - -int main(int argc, char **argv) -{ - checkImageSupport(); - for(int i=1;i<25;i++) - { - runTest(i,i,i, hipChannelFormatKindFloat); - runTest(i+1,i,i, hipChannelFormatKindSigned); - runTest(i,i+1,i, hipChannelFormatKindSigned); - } - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyAll.cpp b/tests/src/runtimeApi/memory/hipMemcpyAll.cpp deleted file mode 100644 index dc9df9997b..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyAll.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include -#include -#include "test_common.h" - -#define len 1024 * 1024 -#define size len * sizeof(float) - -template -void hmemset(T* ptr, T value) { - for (int i = 0; i < len; i++) { - ptr[i] = value; - } -} - -int main() { - int num; - hipGetDeviceCount(&num); - if (num < 2) { - printf("warning: Not enough GPUs to run the test, exiting without running.\n"); - passed(); - return 0; - } - - float *h0, *h1; - float *ph0, *ph1; - float *d0, *d1; - h0 = new float[len]; - h1 = new float[len]; - hmemset(h0, 1.0f); - int gpu0 = 0, gpu1 = 1; - hipSetDevice(gpu0); - hipHostMalloc((void**)&ph0, size); - hipMalloc(&d0, size); - hipSetDevice(gpu1); - hipHostMalloc((void**)&ph1, size); - hipMalloc(&d1, size); - hipSetDevice(gpu0); - - - hipMemcpy(h1, h0, size, hipMemcpyDefault); - hipMemcpy(ph0, h1, size, hipMemcpyDefault); - hipMemcpy(ph1, ph0, size, hipMemcpyDefault); - assert(h0[0] == ph1[0]); - hmemset(ph1, 0.0f); - hipMemcpy(h0, ph1, size, hipMemcpyDefault); - assert(h0[0] == 0.0f); - - - hipSetDevice(gpu0); - hmemset(ph0, 2.0f); - hipMemcpy(d0, ph0, size, hipMemcpyDefault); - hipMemcpy(h0, d0, size, hipMemcpyDefault); - - assert(h0[0] == ph0[0]); - hmemset(h0, 3.0f); - hipMemcpy(d0, h0, size, hipMemcpyDefault); - - hipMemcpy(ph0, d0, size, hipMemcpyDefault); - - assert(h0[0] == ph0[0]); - - hipSetDevice(gpu1); - hmemset(ph1, 2.0f); - hipMemcpy(d1, ph1, size, hipMemcpyDefault); - - hipMemcpy(h1, d1, size, hipMemcpyDefault); - - assert(h1[0] == ph1[0]); - hmemset(h1, 3.0f); - hipMemcpy(d1, h1, size, hipMemcpyDefault); - - hipMemcpy(ph1, d1, size, hipMemcpyDefault); - - assert(h1[0] == ph1[0]); - - hipSetDevice(gpu0); - hmemset(ph0, 4.0f); - hipMemcpy(d0, ph0, size, hipMemcpyDefault); - - hipMemcpy(ph0, d0, size, hipMemcpyDefault); - - hipMemcpy(h0, d0, size, hipMemcpyDefault); - - assert(ph0[0] == 4.0f); - assert(h0[0] == 4.0f); - - hipSetDevice(gpu1); - hmemset(ph1, 5.0f); - hipMemcpy(d1, ph1, size, hipMemcpyDefault); - - hipMemcpy(ph1, d1, size, hipMemcpyDefault); - - hipMemcpy(h1, d1, size, hipMemcpyDefault); - - assert(ph1[0] == 5.0f); - assert(h1[0] == 5.0f); - - hipSetDevice(gpu0); - hipMemcpy(d0, ph1, size, hipMemcpyDefault); - - hipMemcpy(d1, d0, size, hipMemcpyDefault); - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp b/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp deleted file mode 100644 index 9dde7b2b59..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyAsync.cpp +++ /dev/null @@ -1,381 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Test under-development. Calls async mem-copy API, experiment with functionality. - -#include "hip/hip_runtime.h" -#include "test_common.h" -unsigned p_streams = 2; - - -void simpleNegTest() { - printf("testing: %s\n", __func__); - hipError_t e; - float *A_malloc, *A_pinned, *A_d; - - size_t Nbytes = N * sizeof(float); - A_malloc = (float*)malloc(Nbytes); - HIPCHECK(hipHostMalloc((void**)&A_pinned, Nbytes, hipHostMallocDefault)); - A_d = NULL; - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPASSERT(A_d != NULL); - // Can't use default with async copy - e = hipMemcpyAsync(A_pinned, A_d, Nbytes, hipMemcpyDefault, NULL); - // HIPASSERT (e == hipSuccess); - - - // Not sure what happens here, the memory must be pinned. - e = hipMemcpyAsync(A_malloc, A_d, Nbytes, hipMemcpyDeviceToHost, NULL); - - printf(" async memcpy of A_malloc to A_d. Result=%d\n", e); - // HIPASSERT (e==hipErrorInvalidValue); -} - -class Pinned; -class Unpinned; - -template -struct HostTraits; - -template <> -struct HostTraits { - static const char* Name() { return "Pinned"; }; - - static void* Alloc(size_t sizeBytes) { - void* p; - HIPCHECK(hipHostMalloc((void**)&p, sizeBytes, hipHostMallocDefault)); - return p; - }; -}; - - -template -__global__ void addK(hipLaunchParm lp, T* A, T K, size_t numElements) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < numElements; i += stride) { - A[i] = A[i] + K; - } -} - - -//--- -// Tests propert dependency resolution between H2D and D2H commands in same stream: -// IN: numInflight : number of copies inflight at any time: -// IN: numPongs = number of iterations to run (iteration) -template -void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, - bool doHostSide) { - HIPASSERT(numElements % numInflight == 0); // Must be evenly divisible. - size_t Nbytes = numElements * sizeof(T); - size_t eachCopyElements = numElements / numInflight; - size_t eachCopyBytes = eachCopyElements * sizeof(T); - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - - printf( - "------------------------------------------------------------------------------------------" - "-----\n"); - printf( - "testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu " - "eachCopyBytes=%zu\n", - __func__, HostTraits::Name(), Nbytes, (double)(Nbytes) / 1024.0 / 1024.0, - numPongs, numInflight, eachCopyElements, eachCopyBytes); - - T* A_h = NULL; - T* A_d = NULL; - - A_h = (T*)(HostTraits::Alloc(Nbytes)); - HIPCHECK(hipMalloc(&A_d, Nbytes)); - - // Initialize the host array: - const T initValue = 13; - const T deviceConst = 2; - const T hostConst = 10000; - for (size_t i = 0; i < numElements; i++) { - A_h[i] = initValue + i; - } - - - for (int k = 0; k < numPongs; k++) { - for (int i = 0; i < numInflight; i++) { - HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes); - HIPCHECK(hipMemcpyAsync(&A_d[i * eachCopyElements], &A_h[i * eachCopyElements], - eachCopyBytes, hipMemcpyHostToDevice, stream)); - } - - hipLaunchKernel(addK, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d, 2, - numElements); - - for (int i = 0; i < numInflight; i++) { - HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes); - HIPCHECK(hipMemcpyAsync(&A_h[i * eachCopyElements], &A_d[i * eachCopyElements], - eachCopyBytes, hipMemcpyDeviceToHost, stream)); - } - - if (doHostSide) { - assert(0); -#if 0 - hipEvent_t e; - HIPCHECK(hipEventCreate(&e)); -#endif - HIPCHECK(hipDeviceSynchronize()); - for (size_t i = 0; i < numElements; i++) { - A_h[i] += hostConst; - } - } - }; - - HIPCHECK(hipDeviceSynchronize()); - - - // Verify we copied back all the data correctly: - for (size_t i = 0; i < numElements; i++) { - T gold = initValue + i; - // Perform calcs in same order as test above to replicate FP order-of-operations: - for (int k = 0; k < numPongs; k++) { - gold += deviceConst; - if (doHostSide) { - gold += hostConst; - } - } - - if (gold != A_h[i]) { - std::cout << i << ": gold=" << gold << " out=" << A_h[i] << std::endl; - HIPASSERT(gold == A_h[i]); - } - } - - - HIPCHECK(hipHostFree(A_h)); - HIPCHECK(hipFree(A_d)); -} - - -//--- -// Send many async copies to the same stream. -// This requires runtime to keep track of many outstanding commands, and in the case of HCC requires -// growing/tracking the signal pool: -template -void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies, - bool syncBetweenCopies) { - size_t Nbytes = numElements * sizeof(T); - size_t eachCopyElements = numElements / numCopies; - size_t eachCopyBytes = eachCopyElements * sizeof(T); - - printf( - "------------------------------------------------------------------------------------------" - "-----\n"); - printf( - "testing: %s Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n", - __func__, Nbytes, (double)(Nbytes) / 1024.0 / 1024.0, numCopies, eachCopyElements, - eachCopyBytes); - - T* A_d; - T *A_h1, *A_h2; - - HIPCHECK(hipHostMalloc((void**)&A_h1, Nbytes, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&A_h2, Nbytes, hipHostMallocDefault)); - HIPCHECK(hipMalloc(&A_d, Nbytes)); - - for (int i = 0; i < numElements; i++) { - A_h1[i] = 3.14f + static_cast(i); - } - - - // stream=0; // fixme TODO - - - for (int i = 0; i < numCopies; i++) { - HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes); - HIPCHECK(hipMemcpyAsync(&A_d[i * eachCopyElements], &A_h1[i * eachCopyElements], - eachCopyBytes, hipMemcpyHostToDevice, stream)); - } - - if (syncBetweenCopies) { - HIPCHECK(hipDeviceSynchronize()); - } - - for (int i = 0; i < numCopies; i++) { - HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes); - HIPCHECK(hipMemcpyAsync(&A_h2[i * eachCopyElements], &A_d[i * eachCopyElements], - eachCopyBytes, hipMemcpyDeviceToHost, stream)); - } - - HIPCHECK(hipDeviceSynchronize()); - - - // Verify we copied back all the data correctly: - for (int i = 0; i < numElements; i++) { - HIPASSERT(A_h1[i] == A_h2[i]); - } - - - HIPCHECK(hipHostFree(A_h1)); - HIPCHECK(hipHostFree(A_h2)); - HIPCHECK(hipFree(A_d)); -} - - -//--- -// Classic example showing how to overlap data transfer with compute. -// We divide the work into "chunks" and create a stream for each chunk. -// Each chunk then runs a H2D copy, followed by kernel execution, followed by D2H copyback. -// Work in separate streams is independent which enables concurrency. - -// IN: nStreams : number of streams to use for the test -// IN :useNullStream - use NULL stream. Synchronizes everything. -// IN: useSyncMemcpyH2D - use sync memcpy (no overlap) for H2D -// IN: useSyncMemcpyD2H - use sync memcpy (no overlap) for D2H -void test_chunkedAsyncExample(int nStreams, bool useNullStream, bool useSyncMemcpyH2D, - bool useSyncMemcpyD2H) { - size_t Nbytes = N * sizeof(int); - printf("testing: %s(useNullStream=%d, useSyncMemcpyH2D=%d, useSyncMemcpyD2H=%d) ", __func__, - useNullStream, useSyncMemcpyH2D, useSyncMemcpyD2H); - printf("Nbytes=%zu (%6.1f MB)\n", Nbytes, (double)(Nbytes) / 1024.0 / 1024.0); - - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, true); - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - - hipStream_t* stream = (hipStream_t*)malloc(sizeof(hipStream_t) * nStreams); - if (useNullStream) { - nStreams = 1; - stream[0] = NULL; - } else { - for (int i = 0; i < nStreams; ++i) { - HIPCHECK(hipStreamCreate(&stream[i])); - } - } - - - size_t workLeft = N; - size_t workPerStream = N / nStreams; - for (int i = 0; i < nStreams; ++i) { - size_t work = (workLeft < workPerStream) ? workLeft : workPerStream; - size_t workBytes = work * sizeof(int); - - size_t offset = i * workPerStream; - HIPASSERT(A_d + offset < A_d + Nbytes); - HIPASSERT(B_d + offset < B_d + Nbytes); - HIPASSERT(C_d + offset < C_d + Nbytes); - if (useSyncMemcpyH2D) { - HIPCHECK(hipMemcpy(&A_d[offset], &A_h[offset], workBytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(&B_d[offset], &B_h[offset], workBytes, hipMemcpyHostToDevice)); - } else { - HIPCHECK(hipMemcpyAsync(&A_d[offset], &A_h[offset], workBytes, hipMemcpyHostToDevice, - stream[i])); - HIPCHECK(hipMemcpyAsync(&B_d[offset], &B_h[offset], workBytes, hipMemcpyHostToDevice, - stream[i])); - }; - - hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], - &A_d[offset], &B_d[offset], &C_d[offset], work); - - if (useSyncMemcpyD2H) { - HIPCHECK(hipMemcpy(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost)); - } else { - HIPCHECK(hipMemcpyAsync(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost, - stream[i])); - } - } - - - HIPCHECK(hipDeviceSynchronize()); - - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, true); - - free(stream); -}; - - -//--- -// Parse arguments specific to this test. -void parseMyArguments(int argc, char* argv[]) { - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, "--streams")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_streams)) { - failed("Bad streams argument"); - } - } else { - failed("Bad argument '%s'", arg); - } - }; -}; - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, false); - parseMyArguments(argc, argv); - - - printf("info: set device to %d tests=%x\n", p_gpuDevice, p_tests); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - if (p_tests & 0x01) { - simpleNegTest(); - } - - if (p_tests & 0x02) { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - test_manyInflightCopies(stream, 1024, 16, true); - test_manyInflightCopies( - stream, 1024, 4, true); // verify we re-use the same entries instead of growing pool. - test_manyInflightCopies(stream, 1024 * 8, 64, false); - - HIPCHECK(hipStreamDestroy(stream)); - } - - - if (p_tests & 0x04) { - test_chunkedAsyncExample(p_streams, true, true, true); // Easy sync version - test_chunkedAsyncExample(p_streams, false, true, true); // Easy sync version - test_chunkedAsyncExample(p_streams, false, false, true); // Some async - test_chunkedAsyncExample(p_streams, false, false, false); // All async - } - - if (p_tests & 0x08) { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - // test_pingpong(stream, 1024*1024*32, 1, 1, false); - // test_pingpong(stream, 1024*1024*32, 1, 10, false); - - HIPCHECK(hipStreamDestroy(stream)); - } - - - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyAsync2.cpp b/tests/src/runtimeApi/memory/hipMemcpyAsync2.cpp deleted file mode 100644 index b6beb81375..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyAsync2.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * HIT_END - */ - -#include "test_common.h" - -#define SIZE 1024 * 1024 - -int main() { - float *A, *Ad; - HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocDefault)); - HIPCHECK(hipMalloc((void**)&Ad, SIZE)); - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - for (int i = 0; i < SIZE; i++) { - HIPCHECK(hipMemcpyAsync(Ad, A, SIZE, hipMemcpyHostToDevice, stream)); - HIPCHECK(hipDeviceSynchronize()); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyAtoH.cpp b/tests/src/runtimeApi/memory/hipMemcpyAtoH.cpp deleted file mode 100644 index f7c6f34adf..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyAtoH.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -/* - * Test Scenarios: - * 1. Perform simple hipMemcpyAtoH - * 2. Perform bytecount 0 validation for hipMemcpyAtoH API - * 3. Allocate Memory from one GPU device and call hipMemcpyAtoH from Peer - * GPU device - * 4. Perform hipMemcpyAtoH Negative Scenarios - * 5. Perform hipMemcpyAtoH on Pinned Host memory - * Scenarios 2 is disabled as there is a corresponding bug raised for it. - */ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipMemcpyAtoH_simple --tests 1 - * TEST_NAMED: %t hipMemcpyAtoH_DeviceContextChange --tests 3 - * TEST_NAMED: %t hipMemcpyAtoH_NegativeTests --tests 4 - * TEST_NAMED: %t hipMemcpyAtoH_PinnedHostMemory --tests 5 - * HIT_END - */ -#include "test_common.h" - -#define NUM_W 10 -#define NUM_H 1 -#define INITIAL_VAL 8 -#define BYTE_COUNT 2 -template -class MemcpyAtoH { - hipArray *A_d; - T *hData, *B_h; - size_t width; - size_t height; - public: - void AllocateMemory(); - void DeAllocateMemory(); - bool hipMemcpyAtoH_NegativeTests(); - bool hipMemcpyAtoH_simple(); - bool hipMemcpyAtoH_PinnedHostMemory(); - bool hipMemcpyAtoH_ByteCountZero(); - bool hipMemcpyAtoH_PeerDeviceContext(); - bool ValidateResult(T* result, T compare); -}; -template -void MemcpyAtoH::AllocateMemory() { - width = NUM_W * sizeof(T); - height = NUM_H; - hData = reinterpret_cast(malloc(width)); - B_h = reinterpret_cast(malloc(width)); - for (int i = 0; i < NUM_W; i++) { - B_h[i] = 10; - hData[i] = INITIAL_VAL; - } - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HIPCHECK(hipMallocArray(&A_d, &desc, NUM_W, 1, hipArrayDefault)); - HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, hData, sizeof(T)*NUM_W, - sizeof(T)*NUM_W, 1, hipMemcpyHostToDevice)); -} -template -bool MemcpyAtoH::ValidateResult(T *result, T compare) { - bool TestPassed = true; - for (int i = 0; i < BYTE_COUNT; i++) { - if (result[i] != compare) { - TestPassed = false; - break; - } - } - return TestPassed; -} -template -void MemcpyAtoH::DeAllocateMemory() { - hipFreeArray(A_d); - free(hData); - free(B_h); -} -template -bool MemcpyAtoH::hipMemcpyAtoH_simple() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipMemcpyAtoH(B_h, A_d, 0, BYTE_COUNT*sizeof(T))); - TestPassed = ValidateResult(B_h, hData[0]); - DeAllocateMemory(); - return TestPassed; -} - -template -bool MemcpyAtoH::hipMemcpyAtoH_PinnedHostMemory() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - T *D_h{nullptr}; - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), width * NUM_H)); - HIPCHECK(hipMemcpyAtoH(D_h, A_d, 0, BYTE_COUNT*sizeof(T))); - TestPassed = ValidateResult(D_h, hData[0]); - HIPCHECK(hipHostFree(D_h)); - DeAllocateMemory(); - return TestPassed; -} - -template -bool MemcpyAtoH::hipMemcpyAtoH_PeerDeviceContext() { - bool TestPassed = true; - int peerAccess = 0; - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIPCHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1)); - if (!peerAccess) { - printf("Skipped the test as there is no peer access\n"); - } else { - HIPCHECK(hipSetDevice(0)); - - unsigned int flags = 0; - HIPCHECK(hipGetDeviceFlags(&flags)); - - AllocateMemory(); - HIPCHECK(hipSetDevice(1)); - - // hipMemcpyAtoH will invoke cuda driver api cuMemcpyAtoH() which need - // the primary context for device 1. The primary context can be - // initialized at the first call of a runtime api through hipSetDeviceFlags(). - // Because of no runtime api called before cuMemcpyAtoH(), we have to - // explicitly call hipSetDeviceFlags(). - HIPCHECK(hipSetDeviceFlags(flags)); // Only cuda driver api need this - - HIPCHECK(hipMemcpyAtoH(B_h, A_d, 0, BYTE_COUNT*sizeof(T))); - TestPassed = ValidateResult(B_h, hData[0]); - DeAllocateMemory(); - } - } else { - printf("Testcase Skipped as no of devices < 2"); - } - - return TestPassed; -} -template -bool MemcpyAtoH::hipMemcpyAtoH_ByteCountZero() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - hipError_t err; - err = hipMemcpyAtoH(B_h, A_d, 0, 0); - if (err == hipSuccess) { - TestPassed = ValidateResult(B_h, 10); - } else { - printf("hipMemcpyAtoH failed when byteCount is 0 \n"); - TestPassed = false; - } - // Source Array is nullptr - err = hipMemcpyAtoH(B_h, nullptr, 0, BYTE_COUNT*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyAtoH failed when src array is nullptr\n"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} -template -bool MemcpyAtoH::hipMemcpyAtoH_NegativeTests() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - hipError_t err; - // Destination pointer is nullptr - err = hipMemcpyAtoH(nullptr, A_d, 0, BYTE_COUNT*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyAtoH failed when dest ptr is nullptr\n"); - TestPassed = false; - } - // Source offset is more than allocated size - err = hipMemcpyAtoH(B_h, A_d, 100, BYTE_COUNT*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyAtoH failed when source offset invalid\n"); - TestPassed = false; - } - // ByteCount is greater than allocated size - err = hipMemcpyAtoH(B_h, A_d, 0, 12*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyAtoH failed when byteCount > allocatedSize\n"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - - -int main(int argc, char **argv) { - bool TestPassed = true; - checkImageSupport(); - HipTest::parseStandardArguments(argc, argv, false); - MemcpyAtoH AtoH_obj; - if (p_tests == 1) { - TestPassed = AtoH_obj.hipMemcpyAtoH_simple(); - } else if (p_tests == 2) { - TestPassed = AtoH_obj.hipMemcpyAtoH_ByteCountZero(); - } else if (p_tests == 3) { -#ifndef _WIN64 - TestPassed = AtoH_obj.hipMemcpyAtoH_PeerDeviceContext(); -#endif - } else if (p_tests == 4) { - TestPassed = AtoH_obj.hipMemcpyAtoH_NegativeTests(); - } else if (p_tests == 5) { - TestPassed = AtoH_obj.hipMemcpyAtoH_PinnedHostMemory(); - } else { - printf("Provide a valid option \n"); - TestPassed = false; - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp b/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp deleted file mode 100644 index 15357a1c87..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t - * sizeBytes); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; - int *A_h, *B_h, *C_h; - - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - int canAccessPeer = 0; - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMalloc(&X_d, Nbytes)); - HIPCHECK(hipMalloc(&Y_d, Nbytes)); - HIPCHECK(hipMalloc(&Z_d, Nbytes)); - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMemcpyDtoD((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes)); - HIPCHECK(hipMemcpyDtoD((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(X_d), static_cast(Y_d), Z_d, N); - HIPCHECK(hipMemcpyDtoH(C_h, (hipDeviceptr_t)Z_d, Nbytes)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipFree(X_d)); - HIPCHECK(hipFree(Y_d)); - HIPCHECK(hipFree(Z_d)); - } else { - std::cout<<"Machine does not seem to have P2P Capabilities, Empty Pass"< 1) { - - int canAccessPeer = 0; - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMalloc(&X_d, Nbytes)); - HIPCHECK(hipMalloc(&Y_d, Nbytes)); - HIPCHECK(hipMalloc(&Z_d, Nbytes)); - - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipStreamCreate(&s)); - HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s)); - HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(X_d), static_cast(Y_d), Z_d, N); - HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s)); - HIPCHECK(hipStreamSynchronize(s)); - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - HIPCHECK(hipStreamDestroy(s)); - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipFree(X_d)); - HIPCHECK(hipFree(Y_d)); - HIPCHECK(hipFree(Z_d)); - } else { - std::cout<<"Machine does not seem to have P2P Capabilities, Empty Pass"< -class MemcpyHtoA { - hipArray *A_d; - T *hData, *B_h, *A_h, *D_h; - size_t width; - size_t height; - public: - void AllocateMemory(); - void DeAllocateMemory(); - bool hipMemcpyHtoA_NegativeTests(); - bool hipMemcpyHtoA_simple(); - bool hipMemcpyHtoA_PinnedHostMemory(); - bool hipMemcpyHtoA_ByteCountZero(); - bool hipMemcpyHtoA_PeerDeviceContext(); - bool ValidateResult(T* result, T compare); -}; -template -void MemcpyHtoA::AllocateMemory() { - width = NUM_W * sizeof(T); - height = NUM_H; - hData = reinterpret_cast(malloc(width)); - B_h = reinterpret_cast(malloc(width)); - A_h = reinterpret_cast(malloc(width)); - HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), width * NUM_H)); - for (int i = 0; i < NUM_W; i++) { - A_h[i] = 1; - B_h[i] = 10; - D_h[i] = 123; - hData[i] = INITIAL_VAL; - } - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HIPCHECK(hipMallocArray(&A_d, &desc, NUM_W, 1, hipArrayDefault)); - HIPCHECK(hipMemcpy2DToArray(A_d, 0, 0, hData, sizeof(T)*NUM_W, - sizeof(T)*NUM_W, 1, hipMemcpyHostToDevice)); -} -template -bool MemcpyHtoA::ValidateResult(T *result, T compare) { - bool TestPassed = true; - for (int i = 0; i < BYTECOUNT; i++) { - if (result[i] != compare) { - TestPassed = false; - break; - } - } - return TestPassed; -} -template -void MemcpyHtoA::DeAllocateMemory() { - hipFreeArray(A_d); - free(hData); - free(B_h); - free(A_h); -} -template -bool MemcpyHtoA::hipMemcpyHtoA_simple() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipMemcpyHtoA(A_d, 0, B_h, BYTECOUNT*sizeof(T))); - HIPCHECK(hipMemcpy2DFromArray(A_h, sizeof(T)*NUM_W, A_d, - 0, 0, sizeof(T)*NUM_W, 1, hipMemcpyDeviceToHost)); - TestPassed = ValidateResult(A_h, B_h[0]); - DeAllocateMemory(); - return TestPassed; -} -template -bool MemcpyHtoA::hipMemcpyHtoA_PinnedHostMemory() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - HIPCHECK(hipMemcpyHtoA(A_d, 0, D_h, BYTECOUNT*sizeof(T))); - HIPCHECK(hipMemcpy2DFromArray(A_h, sizeof(T)*NUM_W, A_d, - 0, 0, sizeof(T)*NUM_W, 1, hipMemcpyDeviceToHost)); - TestPassed = ValidateResult(A_h, D_h[0]); - DeAllocateMemory(); - HIPCHECK(hipHostFree(D_h)); - return TestPassed; -} - -template -bool MemcpyHtoA::hipMemcpyHtoA_PeerDeviceContext() { - bool TestPassed = true; - int peerAccess = 0; - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIPCHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1)); - if (!peerAccess) { - printf("Skipped the test as there is no peer access\n"); - } else { - HIPCHECK(hipSetDevice(0)); - - unsigned int flags = 0; - HIPCHECK(hipGetDeviceFlags(&flags)); - - AllocateMemory(); - HIPCHECK(hipSetDevice(1)); - - // hipMemcpyAtoH will invoke cuda driver api hipMemcpyHtoA() which need - // the primary context for device 1. The primary context can be - // initialized at the first call of a runtime api through hipSetDeviceFlags(). - // Because of no runtime api called before hipMemcpyHtoA(), we have to - // explicitly call hipSetDeviceFlags(). - HIPCHECK(hipSetDeviceFlags(flags)); // Only cuda driver api need this - - HIPCHECK(hipMemcpyHtoA(A_d, 0, B_h, BYTECOUNT*sizeof(T))); - HIPCHECK(hipMemcpy2DFromArray(A_h, sizeof(T)*NUM_W, A_d, - 0, 0, sizeof(T)*NUM_W, 1, hipMemcpyDeviceToHost)); - TestPassed = ValidateResult(A_h, B_h[0]); - DeAllocateMemory(); - } - } else { - printf("Testcase Skipped as no of devices < 2"); - } - return TestPassed; -} -template -bool MemcpyHtoA::hipMemcpyHtoA_ByteCountZero() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - hipError_t err; - err = hipMemcpyHtoA(A_d, 0, B_h, 0); - HIPCHECK(hipMemcpy2DFromArray(A_h, sizeof(T)*NUM_W, A_d, - 0, 0, sizeof(T)*NUM_W, 1, hipMemcpyDeviceToHost)); - if (err == hipSuccess) { - TestPassed = ValidateResult(A_h, INITIAL_VAL); - } else { - printf("hipMemcpyHtoA failed when byteCount is 0 \n"); - TestPassed = false; - } - // Destination Array is nullptr - err = hipMemcpyHtoA(nullptr, 0, B_h, BYTECOUNT*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyHtoA failed when dest ptr is nullptr\n"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - -template -bool MemcpyHtoA::hipMemcpyHtoA_NegativeTests() { - bool TestPassed = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - hipError_t err; - // Source pinter is nullptr - err = hipMemcpyHtoA(A_d, 0, nullptr, BYTECOUNT*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyHtoA failed when src array is nullptr\n"); - TestPassed = false; - } - // dst offset is more than allocated size - err = hipMemcpyHtoA(A_d, 100, B_h, BYTECOUNT*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyHtoA failed when source offset invalid\n"); - TestPassed = false; - } - // ByteCount is greater than allocated size - err = hipMemcpyHtoA(A_d, 0, B_h, 12*sizeof(T)); - if (err == hipSuccess) { - printf("hipMemcpyHtoA failed when byteCount > allocatedSize\n"); - TestPassed = false; - } - DeAllocateMemory(); - return TestPassed; -} - - -int main(int argc, char **argv) { - bool TestPassed = true; - checkImageSupport(); - HipTest::parseStandardArguments(argc, argv, false); - MemcpyHtoA HtoA_obj; - if (p_tests == 1) { - TestPassed = HtoA_obj.hipMemcpyHtoA_simple(); - } else if (p_tests == 2) { - TestPassed = HtoA_obj.hipMemcpyHtoA_ByteCountZero(); - } else if (p_tests == 3) { - TestPassed = HtoA_obj.hipMemcpyHtoA_PeerDeviceContext(); - } else if (p_tests == 4) { - TestPassed = HtoA_obj.hipMemcpyHtoA_NegativeTests(); - } else if (p_tests == 5) { - TestPassed = HtoA_obj.hipMemcpyHtoA_PinnedHostMemory(); - } else { - printf("Provide a valid option \n"); - TestPassed = false; - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegativeMThrdMSize.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegativeMThrdMSize.cpp deleted file mode 100644 index 53dae66758..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyNegativeMThrdMSize.cpp +++ /dev/null @@ -1,1192 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Testcase Description: This test case achieves two scenarios -// 1) Verifies the working of Memcpy apis for range of Memory sizes from -// smallest one unit transfer to maxmem available. -// 2) Launches NUM_THREADS threads. Each thread in turn tests the working -// of 8 hipmemcpy apis - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 LINK_OPTIONS -lpthread - * TEST_NAMED: %t hipMemcpyNegativeMThrdMSize_Negative_tests --tests 1 - * TEST_NAMED: %t hipMemcpyNegativeMThrdMSize_MultiThread_tests --tests 2 - * TEST_NAMED: %t hipMemcpyNegativeMThrdMSize_MultiSize_singleType --tests 3 --memcpyPeersOnly 0 --testAllTypes 0 - * HIT_END - */ - -#ifdef __linux__ -#include -#endif -#include -#include -#include "test_common.h" - -#define NUM_THREADS 10 -#define NUM_ELM 1024*1024 -#define HIPTEST_TRUE 1 - -int memcpyPeersOnly = 1; -int testAllTypes = 0; -int Available_Gpus = 0; -std::atomic failureCount{0}; - -enum apiToTest {TEST_MEMCPY, TEST_MEMCPYH2D, TEST_MEMCPYD2H, TEST_MEMCPYD2D, - TEST_MEMCPYASYNC, TEST_MEMCPYH2DASYNC, TEST_MEMCPYD2HASYNC, - TEST_MEMCPYD2DASYNC, TEST_MAX}; -std::vector apiNameToTest = { "hipMemcpy", "hipMemcpyH2D", - "hipMemcpyD2H", "hipMemcpyD2D", "hipMemcpyAsync", - "hipMemcpyH2DAsync", "hipMemcpyD2HAsync", "hipMemcpyD2DAsync" }; - -// If memcpyPeersOnly is true, then checks if given gpus are peers and returns -// true if they are peers, else false -// If memcpyPeersOnly is false, then returns true always -bool gpusIsPeer(int gpu0, int gpu1) { - bool bRet = true; - if (HIPTEST_TRUE == memcpyPeersOnly) { - int CanAccessPeer1 = 0, CanAccessPeer2 = 0; - HIPCHECK(hipDeviceCanAccessPeer(&CanAccessPeer1, gpu0, gpu1)); - HIPCHECK(hipDeviceCanAccessPeer(&CanAccessPeer2, gpu1, gpu0)); - if ((CanAccessPeer1 * CanAccessPeer2) == 0) { - bRet = false; - } - } - - return bRet; -} - -template -class memcpyTests { - public: - T *A_h, *B_h; - apiToTest api; - size_t NUM_ELMTS = 0; - memcpyTests(apiToTest val, size_t num_elmts); - bool Memcpy_And_verify(); - ~memcpyTests(); -}; - -class Memcpy_Negative_Tests { - float *A_h = nullptr, *B_h = nullptr, *A_d = nullptr, *A_d1 = nullptr, - *C_d = nullptr, *C_h = nullptr; - hipStream_t stream; - public: - void AllocateMemory(); - void DeAllocateMemory(); - // The following function will test negative scenarios with hipMemcpy() - bool Test_Memcpy(void); - bool Test_MemcpyAsync(void); - bool Test_MemcpyHtoD(void); - bool Test_MemcpyHtoDAsync(void); - bool Test_MemcpyDtoH(void); - bool Test_MemcpyDtoHAsync(void); - bool Test_MemcpyDtoD(void); - bool Test_MemcpyDtoDAsync(void); -}; - -void Memcpy_Negative_Tests::AllocateMemory() { - A_h = reinterpret_cast(malloc(NUM_ELM * sizeof(float))); - B_h = reinterpret_cast(malloc(NUM_ELM * sizeof(float))); - C_h = reinterpret_cast(malloc(NUM_ELM * sizeof(float))); - if ((A_h == nullptr) || (B_h == nullptr) || (C_h == nullptr)) { - failed("Malloc call failed!"); - } - - HIPCHECK(hipMalloc(&A_d, (NUM_ELM*sizeof(float)))); - HIPCHECK(hipMalloc(&A_d1, (NUM_ELM*sizeof(float)))); - HIPCHECK(hipMalloc(&C_d, (NUM_ELM*sizeof(float)))); - - for ( int i = 0; i < NUM_ELM; ++i ) { - A_h[i] = 123; - B_h[i] = 0; - C_h[i] = 1; - } - HIPCHECK(hipStreamCreate(&stream)); -} - -void Memcpy_Negative_Tests::DeAllocateMemory() { - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(A_d1)); - HIPCHECK(hipFree(C_d)); - free(A_h); - free(B_h); - free(C_h); - HIPCHECK(hipStreamDestroy(stream)); -} -bool Memcpy_Negative_Tests::Test_Memcpy(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - // Copying only half the memory on device side from host - HIPCHECK(hipMemcpy(A_d, A_h, (NUM_ELM/2) * sizeof(float), hipMemcpyDefault)); - // Copying device memory to host to verify if the content is consistent - HIPCHECK(hipMemcpy(B_h, A_d, NUM_ELM * sizeof(float), hipMemcpyDefault)); - // Verifying the host content copied in the above step for consistency. - int Data_mismatch = 0; - - for (int i = 0; i < (NUM_ELM/2); ++i) { - if (B_h[i] != 123) { - Data_mismatch++; - break; - } - } - - if (Data_mismatch != 0) { - printf("Data Mismatch for negative test\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is A_h 123 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpy(C_d, B_h, 0, hipMemcpyDefault); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpy with size 0.\n"); - IfTestPassed = false; - } else { - HIPCHECK(hipMemcpy(C_h, C_d, NUM_ELM * sizeof(float), - hipMemcpyDeviceToHost)); - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != A_h[0]) { - printf("Failed for size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - - hipReturn = hipMemcpy(nullptr, A_d, (NUM_ELM/2) * sizeof(float), - hipMemcpyDefault); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpy with nullptr for destination parameter.\n"); - IfTestPassed = false; - } - - hipReturn = hipMemcpy(A_h, nullptr, (NUM_ELM/2) * sizeof(float), - hipMemcpyDefault); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpy with nullptr for source\n"); - IfTestPassed = false; - } - - hipReturn = hipMemcpy(nullptr, nullptr, (NUM_ELM/2) * sizeof(float), - hipMemcpyDefault); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpy with nullptr for source and destination\n"); - IfTestPassed = false; - } - - // To check the behaviour if both the ptrs provided are same - HIPCHECK(hipMemcpy(A_d, A_d, (NUM_ELM/2) * sizeof(float), hipMemcpyDefault)); - HIPCHECK(hipMemcpy(A_h, A_h, (NUM_ELM/2) * sizeof(float), hipMemcpyDefault)); - - // To check the consistency of the data - HIPCHECK(hipMemcpy(B_h, A_d, (NUM_ELM/2) * sizeof(float), hipMemcpyDefault)); - Data_mismatch = 0; - - for (int i = 0; i < (NUM_ELM/2); ++i) { - if (B_h[i] != 123) { - Data_mismatch++; - break; - } - } - - if (Data_mismatch != 0) { - printf("Data Mismatch after memcpy of same src and destination\n"); - IfTestPassed = false; - } - - DeAllocateMemory(); - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyAsync(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - // Copying host data into the device. - HIPCHECK(hipMemcpyAsync(A_d1, A_h, NUM_ELM * sizeof(float), - hipMemcpyDefault, stream)); - - // Passing null pointer: seg fault observed with the following. - hipReturn = hipMemcpyAsync(nullptr, A_h, NUM_ELM * sizeof(float), - hipMemcpyDefault, stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyAsync with nullptr for destination\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is A_h 123 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyAsync(C_d, B_h, 0, hipMemcpyDefault, stream); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyAsync with size 0.\n"); - IfTestPassed = false; - } else { - HIPCHECK(hipMemcpy(C_h, C_d, NUM_ELM * sizeof(float), - hipMemcpyDeviceToHost)); - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != A_h[0]) { - printf("Failed for hipMemcpyAsync size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyAsync(A_d, nullptr, NUM_ELM * sizeof(float), - hipMemcpyDefault, stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyAsync with nullptr for source\n"); - IfTestPassed = false; - } - - hipReturn = hipMemcpyAsync(nullptr, nullptr, - NUM_ELM * sizeof(float), - hipMemcpyDefault, stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyAsync nullptr for source and destination\n"); - IfTestPassed = false; - } - - // Passing default stream just for sanity kind of check - HIPCHECK(hipMemcpyAsync(A_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault, - 0)); - - // Passing stream object belong to destination gpu - // which is against the suggested usage. - HIPCHECK(hipMemcpyAsync(A_d, A_d1, NUM_ELM * sizeof(float), - hipMemcpyDefault, stream)); - - // Passing incorrect memcpy kind is not allowed hence those scenarios - // are not included - - // Copying only half the memory on device side from host - HIPCHECK(hipMemcpyAsync(A_d, A_h, (NUM_ELM/2) * sizeof(float), - hipMemcpyDefault, stream)); - // Copying device memory to host to verify the content is consistent. - HIPCHECK(hipMemcpy(B_h, A_d, (NUM_ELM/2) * sizeof(float), hipMemcpyDefault)); - - // Verifying the host content copied in the above step for consistency. - int Data_mismatch = 0; - for (int i = 0; i < (NUM_ELM/2); ++i) { - if (B_h[i] != 123) { - Data_mismatch++; - break; - } - } - - if (Data_mismatch != 0) { - printf("Data Mismatch after half the size memcpyAsync\n"); - IfTestPassed = false; - } - - // To check the behaviour if both the ptrs provided are same - HIPCHECK(hipMemcpyAsync(A_d, A_d, (NUM_ELM/2) * sizeof(float), - hipMemcpyDefault, stream)); - HIPCHECK(hipMemcpyAsync(A_h, A_h, (NUM_ELM/2) * sizeof(float), - hipMemcpyDefault, stream)); - // To check the consistency of the data - HIPCHECK(hipMemcpy(B_h, A_d, (NUM_ELM) * sizeof(float), hipMemcpyDefault)); - Data_mismatch = 0; - for (int i = 0; i < (NUM_ELM); ++i) { - if (B_h[i] != 123) { - Data_mismatch++; - break; - } - } - - if (Data_mismatch != 0) { - printf("Data Mismatch after memcpyAsync of same src and destination\n"); - IfTestPassed = false; - } - - HIPCHECK(hipStreamSynchronize(stream)); - DeAllocateMemory(); - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyHtoD(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - // Passing null ptr to check the API behavior. - // Expectation: It should not crash and exit gracefully. - hipReturn = hipMemcpyHtoD(hipDeviceptr_t(nullptr), A_h, - NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyHtoD with nullptr for destination\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is A_h 123 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyHtoD(hipDeviceptr_t(C_d), B_h, 0); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyHtoD with size 0.\n"); - IfTestPassed = false; - } else { - HIPCHECK(hipMemcpy(C_h, C_d, NUM_ELM * sizeof(float), - hipMemcpyDeviceToHost)); - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != A_h[0]) { - printf("Failed for hipMemcpyHtoD size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyHtoD(hipDeviceptr_t(A_d), nullptr, - NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyHtoD with nullptr for source\n"); - IfTestPassed = false; - } - hipReturn = hipMemcpyHtoD(hipDeviceptr_t(nullptr), nullptr, - NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyHtoD nullptr for source and destination\n"); - IfTestPassed = false; - } - // Copy half of the allocated memory - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d), A_h, - NUM_ELM * sizeof(float) / 2)); - // copying back to host to verify - HIPCHECK(hipMemcpy(B_h, A_d, - NUM_ELM * sizeof(float), hipMemcpyDeviceToHost)); - - int Data_mismatch = 0; - for (int i = 0; i < (NUM_ELM / 2); ++i) - if (B_h[i] != 123) - Data_mismatch++; - - if (Data_mismatch != 0) { - printf("Data Mismatch after hipMemcpyHtoD with half size\n"); - IfTestPassed = false; - } - DeAllocateMemory(); - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyHtoDAsync(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - // Passing null ptr to check the API behavior. - // Expectation: It should not crash and exit gracefully. - hipReturn = hipMemcpyHtoDAsync(hipDeviceptr_t(nullptr), A_h, - NUM_ELM * sizeof(float), - stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyHtoDAsync with nullptr for destination\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is A_h 123 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyHtoDAsync(hipDeviceptr_t(C_d), B_h, 0, stream); - HIPCHECK(hipMemcpy(C_h, C_d, NUM_ELM * sizeof(float), hipMemcpyDeviceToHost)); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyHtoDAsync with size 0.\n"); - IfTestPassed = false; - } else { - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != A_h[0]) { - printf("Failed for hipMemcpyH2DAsync size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyHtoDAsync(hipDeviceptr_t(A_d), nullptr, - NUM_ELM * sizeof(float), - stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyHtoDAsync with nullptr for source\n"); - IfTestPassed = false; - } - hipReturn = hipMemcpyHtoDAsync(hipDeviceptr_t(nullptr), nullptr, - NUM_ELM * sizeof(float), - stream); - if (hipReturn == hipSuccess) { - printf("Failed MemcpyHtoDAsync nullptr for source and destination\n"); - IfTestPassed = false; - } - - // Copy half of the allocated memory - HIPCHECK(hipMemcpyHtoDAsync(hipDeviceptr_t(A_d), A_h, - NUM_ELM * sizeof(float)/2, stream)); - // copying back to host to verify - HIPCHECK(hipMemcpyDtoH(B_h, hipDeviceptr_t(A_d), - NUM_ELM * sizeof(float))); - int Data_mismatch = 0; - for (int i = 0; i < (NUM_ELM/2); ++i) - if (B_h[i] != 123) - Data_mismatch++; - if (Data_mismatch != 0) { - printf("Data Mismatch after hipMemcpyHtoDAsync with half size\n"); - IfTestPassed = false; - } - - DeAllocateMemory(); - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyDtoH(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - // Copying data from host to device for further operations - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d), A_h, NUM_ELM * sizeof(float))); - - // Passing null ptr to check the API behavior. - // Expectation: It should not crash and exit gracefully. - hipReturn = hipMemcpyDtoH(nullptr, hipDeviceptr_t(A_d), - NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoH with nullptr for destination\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is 1 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyDtoH(C_h, hipDeviceptr_t(C_d), 0); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyDtoH with size 0.\n"); - IfTestPassed = false; - } else { - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != 1) { - printf("Failed for hipMemcpyDtoH size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyDtoH(A_h, hipDeviceptr_t(nullptr), - NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoH with nullptr for source\n"); - IfTestPassed = false; - } - hipReturn = hipMemcpyDtoH(nullptr, hipDeviceptr_t(nullptr), - NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoH nullptr for source and destination\n"); - IfTestPassed = false; - } - // Copy half of the allocated memory - HIPCHECK(hipMemcpyDtoH(B_h, hipDeviceptr_t(A_d), NUM_ELM * sizeof(float)/2)); - - int Data_mismatch = 0; - for (int i = 0; i < (NUM_ELM/2); ++i) - if (B_h[i] != 123) - Data_mismatch++; - - if (Data_mismatch != 0) { - printf("Data Mismatch after hipMemcpyDtoH with half size\n"); - IfTestPassed = false; - } - - DeAllocateMemory(); - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyDtoHAsync(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - - // Copying data from host to device for further operations - HIPCHECK(hipMemcpyHtoDAsync(hipDeviceptr_t(A_d), A_h, - NUM_ELM * sizeof(float), stream)); - - // Passing null ptr to check the API behavior. - // Expectation: It should not crash and exit gracefully. - hipReturn = hipMemcpyDtoHAsync(nullptr, hipDeviceptr_t(A_d), - NUM_ELM * sizeof(float), - stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoHAsync with nullptr for destination\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is C_h initial value 1 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyDtoHAsync(C_h, hipDeviceptr_t(C_d), 0, stream); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyDtoHAsync with size 0.\n"); - IfTestPassed = false; - } else { - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != 1) { - printf("Failed for hipMemcpyD2HAsync size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyDtoHAsync(A_h, hipDeviceptr_t(nullptr), - NUM_ELM * sizeof(float), - stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoHAsync with nullptr for source\n"); - IfTestPassed = false; - } - hipReturn = hipMemcpyDtoHAsync(nullptr, hipDeviceptr_t(nullptr), - NUM_ELM * sizeof(float), - stream); - if (hipReturn == hipSuccess) { - printf("Failed hipMemcpyDtoHAsync nullptr for source and destination\n"); - IfTestPassed = false; - } - - // Copy half of the allocated memory - HIPCHECK(hipMemcpyDtoHAsync(B_h, hipDeviceptr_t(A_d), - NUM_ELM * sizeof(float)/2, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - int Data_mismatch = 0; - for (int i = 0; i < (NUM_ELM/2); ++i) - if (B_h[i] != 123) - Data_mismatch++; - - if (Data_mismatch != 0) { - printf("Data Mismatch after hipMemcpyDtoHAsync with half size\n"); - IfTestPassed = false; - } - // Checking the api with default stream - HIPCHECK(hipMemcpyDtoHAsync(B_h, hipDeviceptr_t(A_d), - NUM_ELM * sizeof(float), 0)); - // Setting device memory to zero - - DeAllocateMemory(); - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyDtoD(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - float *A_d2 = nullptr, *Ad1 = nullptr; - HIPCHECK(hipMalloc(&Ad1, (NUM_ELM * sizeof(float)))); - HIPCHECK(hipMemset(A_d1, 0, NUM_ELM * sizeof(float))); - if (Available_Gpus > 1) { - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMalloc(&A_d2, (NUM_ELM * sizeof(float)))); - HIPCHECK(hipMemset(A_d2, 1, NUM_ELM * sizeof(float))); - } - // Passing null pointers to check the behaviour:: - hipReturn = hipMemcpyDtoD(hipDeviceptr_t(&A_d1), - hipDeviceptr_t(nullptr), NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoD with nullptr for source\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is A_h 123 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyDtoD(hipDeviceptr_t(&C_d), hipDeviceptr_t(&A_d2), 0); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyDtoD with size 0.\n"); - IfTestPassed = false; - } else { - HIPCHECK(hipMemcpy(C_h, C_d, NUM_ELM * sizeof(float), - hipMemcpyDeviceToHost)); - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != A_h[0]) { - printf("Failed for hipMemcpyDtoD size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyDtoD(hipDeviceptr_t(nullptr), - hipDeviceptr_t(&A_d2), NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoD with nullptr for destination\n"); - IfTestPassed = false; - } - hipReturn = hipMemcpyDtoD(hipDeviceptr_t(nullptr), - hipDeviceptr_t(nullptr), NUM_ELM * sizeof(float)); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoD nullptr for source and destination\n"); - IfTestPassed = false; - } - - // Pass real but host ptr:: The below two scenarios gives seg fault. - // Behaviour is as expected - // HIPCHECK(hipMemcpyDtoD(&A_d1, &A_h, NUM_ELM * sizeof(float))); - // HIPCHECK(hipMemcpyDtoD(&A_h, &A_d1, NUM_ELM * sizeof(float))); - int Data_mismatch = 0; - // Copying half of actually allocated memory - HIPCHECK(hipSetDevice(0)); - if (Available_Gpus > 1) { - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d1), A_h, NUM_ELM * sizeof(float))); - if (true == gpusIsPeer(0, 1)) { - HIPCHECK(hipMemcpyDtoD(hipDeviceptr_t(A_d2), hipDeviceptr_t(A_d1), - NUM_ELM * sizeof(float)/2)); - HIPCHECK(hipMemcpyDtoH(B_h, hipDeviceptr_t(A_d2), - NUM_ELM * sizeof(float))); - for (int i = 0; i < NUM_ELM/2; ++i) { - if (B_h[i] != 123) - Data_mismatch++; - } - if (Data_mismatch != 0) { - printf("Data mismatch hipMemcpyDtoD between devices\n"); - IfTestPassed = false; - } - } - } - - // Passing same pointers for source and destination - HIPCHECK(hipMemcpyDtoD(hipDeviceptr_t(A_d1), - hipDeviceptr_t(A_d1), - NUM_ELM * sizeof(float))); - if (Available_Gpus > 1) { - HIPCHECK(hipMemcpyDtoD(hipDeviceptr_t(A_d2), - hipDeviceptr_t(A_d2), - NUM_ELM * sizeof(float))); - } - - DeAllocateMemory(); - HIPCHECK(hipFree(Ad1)); - if (Available_Gpus > 1) - HIPCHECK(hipFree(A_d2)); - - return IfTestPassed; -} - -bool Memcpy_Negative_Tests::Test_MemcpyDtoDAsync(void) { - bool IfTestPassed = true; - hipError_t hipReturn = hipSuccess; - AllocateMemory(); - float *A_d2 = nullptr, *Ad1 = nullptr; - HIPCHECK(hipMalloc(&Ad1, (NUM_ELM * sizeof(float)))); - HIPCHECK(hipMemset(A_d1, 0, NUM_ELM * sizeof(float))); - if (Available_Gpus > 1) { - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMalloc(&A_d2, (NUM_ELM * sizeof(float)))); - HIPCHECK(hipMemset(A_d2, 1, NUM_ELM * sizeof(float))); - } - // Passing null pointers to check the behaviour:: - hipReturn = hipMemcpyDtoDAsync(hipDeviceptr_t(&A_d1), - hipDeviceptr_t(nullptr), - NUM_ELM * sizeof(float), stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoDAsync with nullptr for source\n"); - IfTestPassed = false; - } - // Passing 0 to size and it should return Success - // Validating it with the initial value which is A_h 123 - HIPCHECK(hipMemcpy(C_d, A_h, NUM_ELM * sizeof(float), hipMemcpyDefault)); - hipReturn = hipMemcpyDtoDAsync(hipDeviceptr_t(&C_d), - hipDeviceptr_t(&A_d2), 0, stream); - if (hipReturn != hipSuccess) { - printf("Failed for hipMemcpyDtoDAsync with size 0.\n"); - IfTestPassed = false; - } else { - HIPCHECK(hipMemcpy(C_h, C_d, NUM_ELM * sizeof(float), - hipMemcpyDeviceToHost)); - for (int i =0; i < NUM_ELM; i++) { - if (C_h[i] != A_h[0]) { - printf("Failed for hipMemcpyDtoDAsync size 0 and data modified \n"); - IfTestPassed = false; - break; - } - } - } - hipReturn = hipMemcpyDtoDAsync(hipDeviceptr_t(nullptr), - hipDeviceptr_t(&A_d2), - NUM_ELM * sizeof(float), stream); - if (hipReturn == hipSuccess) { - printf("Failed for hipMemcpyDtoDAsync with nullptr for destination\n"); - IfTestPassed = false; - } - hipReturn = hipMemcpyDtoDAsync(hipDeviceptr_t(nullptr), - hipDeviceptr_t(nullptr), - NUM_ELM * sizeof(float), stream); - if (hipReturn == hipSuccess) { - printf("Failed MemcpyDtoDAsync with nullptr for source and destination\n"); - IfTestPassed = false; - } - - int Data_mismatch = 0; - // Copying half of actually allocated memory - HIPCHECK(hipSetDevice(0)); - if (Available_Gpus > 1) { - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d1), A_h, NUM_ELM * sizeof(float))); - if (true == gpusIsPeer(0, 1)) { - HIPCHECK(hipMemcpyDtoDAsync(hipDeviceptr_t(A_d2), - hipDeviceptr_t(A_d1), NUM_ELM * sizeof(float)/2, stream)); - HIPCHECK(hipMemcpyDtoH(B_h, hipDeviceptr_t(A_d2), - NUM_ELM * sizeof(float))); - for (int i = 0; i < NUM_ELM/2; ++i) { - if (B_h[i] != 123) - Data_mismatch++; - } - if (Data_mismatch != 0) { - printf("Data mismatch hipMemcpyDtoDAsync between devices\n"); - IfTestPassed = false; - } - } - } - - // Testing hipMemcpyDtoDAsync between two devices. - if (Available_Gpus > 1) { - if (true == gpusIsPeer(0, 1)) { - HIPCHECK(hipMemcpyDtoDAsync(hipDeviceptr_t(A_d2), - hipDeviceptr_t(A_d1), NUM_ELM * sizeof(float), 0)); - } - } - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipFree(Ad1)); - if (Available_Gpus > 1) - HIPCHECK(hipFree(A_d2)); - - return IfTestPassed; -} - -template -memcpyTests::memcpyTests(apiToTest val, size_t num_elmts) { - api = val; - NUM_ELMTS = num_elmts; - A_h = reinterpret_cast(malloc(NUM_ELMTS * sizeof(T))); - B_h = reinterpret_cast(malloc(NUM_ELMTS * sizeof(T))); - if ((A_h == nullptr) || (B_h == nullptr)) { - exit(1); - } - - for (size_t i = 0; i < NUM_ELMTS; ++i) { - A_h[i] = 123; - B_h[i] = 0; - } -} - -template -bool memcpyTests::Memcpy_And_verify() { - bool bFail = false; - std::atomic Data_mismatch{0}; - T *A_d[Available_Gpus]; - hipStream_t stream[Available_Gpus]; - for (int i = 0; i < Available_Gpus; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMalloc(&A_d[i], NUM_ELMTS * sizeof(T))); - if (api >= TEST_MEMCPYD2D) { - HIPCHECK(hipStreamCreate(&stream[i])); - } - } - HIPCHECK(hipSetDevice(0)); - - switch (api) { - case TEST_MEMCPY: // To test hipMemcpy() - // Copying data from host to individual devices followed by copying - // back to host and verifying the data consistency. - for (int i = 0; i < Available_Gpus; ++i) { - Data_mismatch = 0; - HIPCHECK(hipMemcpy(A_d[i], A_h, NUM_ELMTS * sizeof(T), - hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_h, A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDeviceToHost)); - for (int j = 0; j < NUM_ELMTS; ++j) { - if (A_h[j] != B_h[j]) { - Data_mismatch++; - } - } - - if (Data_mismatch.load() != 0) { - printf("hipMemcpy: Failed for GPU: %d\n", i); - bFail = true; - } - } - // Device to Device copying for all combinations - for (int i = 0; i < Available_Gpus; ++i) { - for (int j = i+1; j < Available_Gpus; ++j) { - if (true == gpusIsPeer(i, j)) { - Data_mismatch = 0; - HIPCHECK(hipMemcpy(A_d[j], A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDefault)); - // Copying in direction reverse of above to check if bidirectional - // access is happening without any error - HIPCHECK(hipMemcpy(A_d[i], A_d[j], NUM_ELMTS * sizeof(T), - hipMemcpyDefault)); - // Copying data to host to verify the content - HIPCHECK(hipMemcpy(B_h, A_d[j], NUM_ELMTS * sizeof(T), - hipMemcpyDefault)); - for (int k = 0; k < NUM_ELMTS; ++k) { - if (A_h[k] != B_h[k]) - Data_mismatch++; - } - - if (Data_mismatch.load() != 0) { - printf("hipMemcpy: Failed between GPU: %d and %d\n", i, j); - bFail = true; - } - } - } - } - break; - case TEST_MEMCPYH2D: // To test hipMemcpyHtoD() - for (int i = 0; i < Available_Gpus; ++i) { - Data_mismatch = 0; - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d[i]), - A_h, NUM_ELMTS * sizeof(T))); - // Copying data from device to host to check data consistency - HIPCHECK(hipMemcpy(B_h, A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDeviceToHost)); - for (size_t j = 0; j < NUM_ELMTS; ++j) { - if (A_h[j] != B_h[j]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpyHtoD: failed for GPU %d \n", i); - bFail = true; - } - } - break; - case TEST_MEMCPYD2H: // To test hipMemcpyDtoH()--done - for (int i = 0; i < Available_Gpus; ++i) { - Data_mismatch = 0; - HIPCHECK(hipMemcpy(A_d[i], A_h, NUM_ELMTS * sizeof(T), - hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpyDtoH(B_h, hipDeviceptr_t(A_d[i]), - NUM_ELMTS * sizeof(T))); - for (size_t j = 0; j < NUM_ELMTS; ++j) { - if (A_h[j] != B_h[j]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpyDtoH: failed for GPU %d \n", i); - bFail = true; - } - } - break; - case TEST_MEMCPYD2D: // To test hipMemcpyDtoD() - if (Available_Gpus > 1) { - // First copy data from H to D and then from D to D followed by D to H - // HIPCHECK(hipMemcpyHtoD(A_d[0], A_h, NUM_ELMTS * sizeof(T))); - for (int i = 0; i < Available_Gpus; ++i) { - for (int j = i+1; j < Available_Gpus; ++j) { - if (true == gpusIsPeer(i, j)) { - Data_mismatch = 0; - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d[i]), - A_h, NUM_ELMTS * sizeof(T))); - HIPCHECK(hipMemcpyDtoD(hipDeviceptr_t(A_d[j]), - hipDeviceptr_t(A_d[i]), NUM_ELMTS * sizeof(T))); - // Copying in direction reverse of above to check if bidirectional - // access is happening without any error - HIPCHECK(hipMemcpyDtoD(hipDeviceptr_t(A_d[i]), - hipDeviceptr_t(A_d[j]), NUM_ELMTS * sizeof(T))); - HIPCHECK(hipMemcpy(B_h, A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDeviceToHost)); - for (size_t k = 0; k < NUM_ELMTS; ++k) { - if (A_h[k] != B_h[k]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpyDtoD: failed between GPU: %d and %d\n", i, j); - bFail = true; - } - } - } - } - } else { - // As DtoD is not possible we will transfer data from HtH(A_h to B_h) - // so as to get through verification step - HIPCHECK(hipMemcpy(B_h, A_h, NUM_ELMTS * sizeof(T), - hipMemcpyHostToHost)); - for (size_t i = 0; i < NUM_ELMTS; ++i) { - if (A_h[i] != B_h[i]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpy (Host to Host): failed\n"); - bFail = true; - } - } - break; - case TEST_MEMCPYASYNC: // To test hipMemcpyAsync() - // Copying data from host to individual devices followed by copying - // back to host and verifying the data consistency. - for (int i = 0; i < Available_Gpus; ++i) { - Data_mismatch = 0; - HIPCHECK(hipMemcpyAsync(A_d[i], A_h, NUM_ELMTS * sizeof(T), - hipMemcpyHostToDevice, stream[i])); - HIPCHECK(hipMemcpyAsync(B_h, A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDeviceToHost, stream[i])); - HIPCHECK(hipStreamSynchronize(stream[i])); - for (size_t k = 0; k < NUM_ELMTS; ++k) { - if (A_h[k] != B_h[k]) - Data_mismatch++; - } - - if (Data_mismatch.load() != 0) { - printf("hipMemcpyAsync: failed for GPU %d\n", i); - bFail = true; - } - } - // Device to Device copying for all combinations - for (int i = 0; i < Available_Gpus; ++i) { - for (int j = i+1; j < Available_Gpus; ++j) { - if (true == gpusIsPeer(i, j)) { - Data_mismatch = 0; - HIPCHECK(hipMemcpyAsync(A_d[j], A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDefault, stream[i])); - // Copying in direction reverse of above to check if bidirectional - // access is happening without any error - HIPCHECK(hipMemcpyAsync(A_d[i], A_d[j], NUM_ELMTS * sizeof(T), - hipMemcpyDefault, stream[i])); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(B_h, A_d[j], NUM_ELMTS * sizeof(T), - hipMemcpyDefault)); - for (size_t k = 0; k < NUM_ELMTS; ++k) { - if (A_h[k] != B_h[k]) - Data_mismatch++; - } - - if (Data_mismatch.load() != 0) { - printf("hipMemcpyAsync: Failed between GPU: %d and %d\n", i, j); - bFail = true; - } - } - } - } - break; - case TEST_MEMCPYH2DASYNC: // To test hipMemcpyHtoDAsync() - for (int i = 0; i < Available_Gpus; ++i) { - Data_mismatch = 0; - HIPCHECK(hipMemcpyHtoDAsync(hipDeviceptr_t(A_d[i]), A_h, - NUM_ELMTS * sizeof(T), stream[i])); - HIPCHECK(hipStreamSynchronize(stream[i])); - // Copying data from device to host to check data consistency - HIPCHECK(hipMemcpy(B_h, A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDeviceToHost)); - for (size_t k = 0; k < NUM_ELMTS; ++k) { - if (A_h[k] != B_h[k]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpyHtoDAsync: failed for GPU %d \n", i); - bFail = true; - } - } - break; - case TEST_MEMCPYD2HASYNC: // To test hipMemcpyDtoHAsync() - for (int i = 0; i < Available_Gpus; ++i) { - Data_mismatch = 0; - HIPCHECK(hipMemcpy(A_d[i], A_h, NUM_ELMTS * sizeof(T), - hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpyDtoHAsync(B_h, hipDeviceptr_t(A_d[i]), - NUM_ELMTS * sizeof(T), stream[i])); - HIPCHECK(hipStreamSynchronize(stream[i])); - for (size_t j = 0; j < NUM_ELMTS; ++j) { - if (A_h[j] != B_h[j]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpyDtoHAsync: failed %d \n", i); - bFail = true; - } - } - break; - case TEST_MEMCPYD2DASYNC: // To test hipMemcpyDtoDAsync() - if (Available_Gpus > 1) { - // First copy data from H to D and then from D to D followed by D to H - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(A_d[0]), - A_h, NUM_ELMTS * sizeof(T))); - for (int i = 0; i < Available_Gpus; ++i) { - for (int j = i+1; j < Available_Gpus; ++j) { - Data_mismatch = 0; - if (true == gpusIsPeer(i, j)) { - HIPCHECK(hipSetDevice(j)); - HIPCHECK(hipMemcpyDtoDAsync(hipDeviceptr_t(A_d[j]), - hipDeviceptr_t(A_d[i]), NUM_ELMTS * sizeof(T), stream[i])); - // Copying in direction reverse of above to check if bidirectional - // access is happening without any error - HIPCHECK(hipMemcpyDtoDAsync(hipDeviceptr_t(A_d[i]), - hipDeviceptr_t(A_d[j]), NUM_ELMTS * sizeof(T), stream[i])); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(B_h, A_d[i], NUM_ELMTS * sizeof(T), - hipMemcpyDeviceToHost)); - for (size_t k = 0; k < NUM_ELMTS; ++k) { - if (A_h[k] != B_h[k]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpyDtoDAsync: failed GPU: %d and %d\n", i, j); - bFail = true; - } - } - } - } - } else { - // As DtoD is not possible we will transfer data from HtH(A_h to B_h) - // so as to get through verification step - Data_mismatch = 0; - HIPCHECK(hipMemcpy(B_h, A_h, NUM_ELMTS * sizeof(T), - hipMemcpyHostToHost)); - for (size_t i = 0; i < NUM_ELMTS; ++i) { - if (A_h[i] != B_h[i]) - Data_mismatch++; - } - if (Data_mismatch.load() != 0) { - printf("hipMemcpy (Host to Host): failed\n"); - bFail = true; - } - } - break; - default: - printf("Did not receive valid option!\n"); - break; - } - - for (int i = 0; i < Available_Gpus; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipFree((A_d[i]))); - if (api >= TEST_MEMCPYD2D) { - HIPCHECK(hipStreamDestroy(stream[i])); - } - } - - // Return true if test is success - if (bFail == true) { - return false; - } else { - return true; - } -} - -template -memcpyTests::~memcpyTests() { - free(A_h); - free(B_h); -} - -void Thread_func(int Threadid) { - for (apiToTest api = TEST_MEMCPY; api < TEST_MAX; api = apiToTest(api + 1)) { - memcpyTests obj(api, 1024); - if (false == obj.Memcpy_And_verify()) { - failureCount++; - } - } -} - -int parseExtraArguments(int argc, char* argv[]) { - int i = 0; - for (i = 1; i < argc; i++) { - const char* arg = argv[i]; - if (!strcmp(arg, " ")) { - // skip nullptr args. - } else if (!strcmp(arg, "--memcpyPeersOnly")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &memcpyPeersOnly)) { - failed("Bad memcpyPeersOnly argument"); - } - } else if (!strcmp(arg, "--testAllTypes")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &testAllTypes)) { - failed("Bad testAllTypes argument"); - } - } else { - failed("Bad argument"); - } - } - return i; -} - - -int main(int argc, char* argv[]) { - bool TestPassed = true; - int extraArgs = 0; - HIPCHECK(hipGetDeviceCount(&Available_Gpus)); - extraArgs = HipTest::parseStandardArguments(argc, argv, false); - parseExtraArguments(extraArgs, argv); - - if (p_tests == 1) { - Memcpy_Negative_Tests test; - TestPassed = test.Test_Memcpy(); - TestPassed &= test.Test_MemcpyAsync(); - TestPassed &= test.Test_MemcpyHtoD(); - TestPassed &= test.Test_MemcpyHtoDAsync(); - TestPassed &= test.Test_MemcpyDtoD(); - TestPassed &= test.Test_MemcpyDtoDAsync(); - TestPassed &= test.Test_MemcpyDtoH(); - TestPassed &= test.Test_MemcpyDtoHAsync(); - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } - } else if (p_tests == 2) { - failureCount = 0; - std::thread Thrd[NUM_THREADS]; - for (int i = 0; i < NUM_THREADS; i++) - Thrd[i] = std::thread(Thread_func, i); - - // Thread join is being called separately so as to allow the - // threads run parallely - for (int i = 0; i < NUM_THREADS; i++) - Thrd[i].join(); - if (failureCount.load() != 0) { - failed("Failed"); - } else { - passed(); - } - } else if (p_tests == 3) { - size_t free = 0, total = 0; - HIPCHECK(hipMemGetInfo(&free, &total)); - failureCount = 0; - // Need to see if allocating all of available free memory will result in - // any issues in windows system before adding the same - std::vector NUM_ELMTS{1, 5, 10, 100, 1024, 10*1024, 100*1024, - 1024*1024, 10*1024*1024, 100*1024*1024, - 1024*1024*1024}; - - for (apiToTest api = TEST_MEMCPY; api < TEST_MAX; api = apiToTest(api+1)) { - for (size_t x : NUM_ELMTS) { - if ((x * sizeof(char)) <= free) { - memcpyTests obj(api, x); - TestPassed &= obj.Memcpy_And_verify(); - HIPCHECK(hipDeviceSynchronize()); - } - - if (HIPTEST_TRUE == testAllTypes) { - // Testing memcpy with various data types - if ((x * sizeof(int)) <= free) { - memcpyTests obj(api, x); - TestPassed &= obj.Memcpy_And_verify(); - HIPCHECK(hipDeviceSynchronize()); - } - if ((x * sizeof(size_t)) <= free) { - memcpyTests obj(api, x); - TestPassed &= obj.Memcpy_And_verify(); - HIPCHECK(hipDeviceSynchronize()); - } - if ((x * sizeof(long double)) <= free) { - memcpyTests obj(api, x); - TestPassed &= obj.Memcpy_And_verify(); - HIPCHECK(hipDeviceSynchronize()); - } - } - } - } - if (TestPassed) { - passed(); - } else { - failed("Test Failed!"); - } - } else { - failed("Didnt receive any valid option\n"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp deleted file mode 100644 index b503186385..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include "test_common.h" - -int main() { - int* A; - int* Ad; - int* Bd; - - // Allocation - HIPCHECK(hipMalloc((void**)&Ad, sizeof(int))); - HIPCHECK(hipMalloc((void**)&Bd, sizeof(int))); - HIPCHECK(hipHostMalloc((void**)&A,sizeof(int))); - - // Kind should be ignored and test should pass even for incorrect kind - HIPCHECK(hipMemcpy(Ad, A, sizeof(int), hipMemcpyDeviceToHost)); - HIPCHECK(hipMemcpy(A, Ad, sizeof(int), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Ad, Bd, sizeof(int), hipMemcpyHostToHost)); - HIPCHECK(hipMemcpy(A, A, sizeof(int), hipMemcpyDeviceToDevice)); - - // nullptr passed as source or destination pointer - HIPASSERT(hipSuccess != hipMemcpy(nullptr, A, sizeof(int), hipMemcpyHostToDevice)); - HIPASSERT(hipSuccess != hipMemcpy(Ad, nullptr, sizeof(int), hipMemcpyHostToDevice)); - - HIPCHECK(hipFree(Ad)); - HIPCHECK(hipFree(Bd)); - HIPCHECK(hipHostFree(A)); - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp b/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp deleted file mode 100644 index bc8a5ebbea..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyPeer.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Conformance test for checking functionality of - * hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t - * sizeBytes); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -int main() { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; - int *A_h, *B_h, *C_h; - - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - int canAccessPeer = 0; - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMalloc(&X_d, Nbytes)); - HIPCHECK(hipMalloc(&Y_d, Nbytes)); - HIPCHECK(hipMalloc(&Z_d, Nbytes)); - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HIPCHECK(hipSetDevice(1)); - hipMemcpyPeer( - X_d, 1, A_d, 0, - Nbytes); // this call is eqv to hipMemcpy(hipMemcpyD2D) which goes via stg bufs. - hipMemcpyPeer(Y_d, 1, B_d, 0, Nbytes); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(X_d), static_cast(Y_d), Z_d, N); - HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipFree(X_d)); - HIPCHECK(hipFree(Y_d)); - HIPCHECK(hipFree(Z_d)); - } else { - std::cout<<"Machine does not seem to have P2P Capabilities, Empty Pass"< 1) { - - int canAccessPeer = 0; - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - if (canAccessPeer) { - HIPCHECK(hipSetDevice(0)); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMalloc(&X_d, Nbytes)); - HIPCHECK(hipMalloc(&Y_d, Nbytes)); - HIPCHECK(hipMalloc(&Z_d, Nbytes)); - - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipStreamCreate(&s)); - HIPCHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, Nbytes, s)); - HIPCHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, Nbytes, s)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(X_d), static_cast(Y_d), Z_d, N); - HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipStreamSynchronize(s)); - HIPCHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HIPCHECK(hipStreamDestroy(s)); - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipFree(X_d)); - HIPCHECK(hipFree(Y_d)); - HIPCHECK(hipFree(Z_d)); - } else { - std::cout<<"Machine does not seem to have P2P Capabilities, Empty Pass"<(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipStreamDestroy(stream)); -} - -void HipMemcpyWithStreamtests::TestwithTwoStream(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int noOfstreams = 2; - int *A_d[noOfstreams], *B_d[noOfstreams], *C_d[noOfstreams]; - int *A_h[noOfstreams], *B_h[noOfstreams], *C_h[noOfstreams]; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - for (int i=0; i < noOfstreams; ++i) { - HipTest::initArrays(&A_d[i], &B_d[i], &C_d[i], &A_h[i], &B_h[i], &C_h[i], N, false); - } - - hipStream_t stream[noOfstreams]; - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipStreamCreate(&stream[i])); - } - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipMemcpyWithStream(A_d[i], A_h[i], Nbytes, hipMemcpyHostToDevice, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_h[i], Nbytes, hipMemcpyHostToDevice, stream[i])); - } - - for (int i=0; i < noOfstreams; ++i) { - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], - static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[i], B_h[i], C_h[i], N); - } - - for (int i=0; i < noOfstreams; ++i) { - HipTest::freeArrays(A_d[i], B_d[i], C_d[i], A_h[i], B_h[i], C_h[i], false); - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamtests::TestDtoDonSameDevice(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int noOfstreams = 2; - int *A_d[noOfstreams], *B_d[noOfstreams], *C_d[noOfstreams]; - int *A_h[noOfstreams], *B_h[noOfstreams], *C_h[noOfstreams]; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HipTest::initArrays(&A_d[0], &B_d[0], &C_d[0], &A_h[0], &B_h[0], &C_h[0], N, false); - - - hipStream_t stream[noOfstreams]; - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMalloc(&A_d[1], Nbytes)); - HIPCHECK(hipMalloc(&B_d[1], Nbytes)); - HIPCHECK(hipMalloc(&C_d[1], Nbytes)); - C_h[1] = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(C_h[1] != NULL); - - HIPCHECK(hipMemcpyWithStream(A_d[0], A_h[0], Nbytes, hipMemcpyHostToDevice, stream[0])); - HIPCHECK(hipMemcpyWithStream(B_d[0], B_h[0], Nbytes, hipMemcpyHostToDevice, stream[0])); - - HIPCHECK(hipMemcpyWithStream(A_d[1], A_d[0], Nbytes, hipMemcpyDeviceToDevice, stream[1])); - HIPCHECK(hipMemcpyWithStream(B_d[1], B_d[0], Nbytes, hipMemcpyDeviceToDevice, stream[1])); - - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipSetDevice(0)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], - static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[0], B_h[0], C_h[i], N); - } - - - HipTest::freeArrays(A_d[0], B_d[0], C_d[0], A_h[0], B_h[0], C_h[0], false); - - if (A_d[1]) { - HIPCHECK(hipFree(A_d[1])); - } - if (B_d[1]) { - HIPCHECK(hipFree(B_d[1])); - } - if (C_d[1]) { - HIPCHECK(hipFree(C_d[1])); - } - if (C_h[1]) { - free(C_h[1]); - } - - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamtests::TestOnMultiGPUwithOneStream(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK(hipGetDeviceCount(&numDevices)); - // If you have single GPU machine the return - if (numDevices <= 1) { - return; - } - int *A_d[numDevices], *B_d[numDevices], *C_d[numDevices]; - int *A_h[numDevices], *B_h[numDevices], *C_h[numDevices]; - - hipStream_t stream[numDevices]; - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HipTest::initArrays(&A_d[i], &B_d[i], &C_d[i], &A_h[i], &B_h[i], &C_h[i], N, false); - } - - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMemcpyWithStream(A_d[i], A_h[i], Nbytes, hipMemcpyHostToDevice, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_h[i], Nbytes, hipMemcpyHostToDevice, stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], - static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[i], B_h[i], C_h[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HipTest::freeArrays(A_d[i], B_d[i], C_d[i], A_h[i], B_h[i], C_h[i], false); - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamtests::TestkindDtoH(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream)); - HIPCHECK(hipMemcpyWithStream(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, - static_cast(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipStreamDestroy(stream)); -} - - -void HipMemcpyWithStreamtests::TestkindDtoD(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK(hipGetDeviceCount(&numDevices)); - // If you have single GPU machine the return - if (numDevices <= 1) { - return; - } - - int canAccessPeer = 0; - hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); - if (!canAccessPeer) { - std::cout<<"Machine does not seem to have P2P Capabilities"<(malloc(Nbytes)); - HIPASSERT(C_h[i] != NULL); - } - - - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMemcpyWithStream(A_d[0], A_h[0], Nbytes, hipMemcpyHostToDevice, stream[0])); - HIPCHECK(hipMemcpyWithStream(B_d[0], B_h[0], Nbytes, hipMemcpyHostToDevice, stream[0])); - - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMemcpyWithStream(A_d[i], A_d[0], Nbytes, hipMemcpyDeviceToDevice, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_d[0], Nbytes, hipMemcpyDeviceToDevice, stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], - static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[0], B_h[0], C_h[i], N); - } - - HipTest::freeArrays(A_d[0], B_d[0], C_d[0], A_h[0], B_h[0], C_h[0], false); - HIPCHECK(hipStreamDestroy(stream[0])); - - for (int i=1; i < numDevices; ++i) { - if (A_d[i]) { - HIPCHECK(hipFree(A_d[i])); - } - if (B_d[i]) { - HIPCHECK(hipFree(B_d[i])); - } - if (C_d[i]) { - HIPCHECK(hipFree(C_d[i])); - } - if (C_h[i]) { - free(C_h[i]); - } - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamtests::TestkindDefault(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(A_d, A_h, Nbytes, hipMemcpyDefault, stream)); - HIPCHECK(hipMemcpyWithStream(B_d, B_h, Nbytes, hipMemcpyDefault, stream)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, - static_cast(A_d), static_cast(B_d), C_d, N); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipStreamDestroy(stream)); -} - -void HipMemcpyWithStreamtests::TestkindDefaultForDtoD(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK(hipGetDeviceCount(&numDevices)); - // If you have single GPU machine the return - if (numDevices <= 1) { - return; - } - - int *A_d[numDevices], *B_d[numDevices], *C_d[numDevices]; - int *A_h[numDevices], *B_h[numDevices], *C_h[numDevices]; - - // Initialize and create the host and device elements for first device - HIPCHECK(hipSetDevice(0)); - HipTest::initArrays(&A_d[0], &B_d[0], &C_d[0], &A_h[0], &B_h[0], &C_h[0], N, false); - - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMalloc(&A_d[i], Nbytes)); - HIPCHECK(hipMalloc(&B_d[i], Nbytes)); - HIPCHECK(hipMalloc(&C_d[i], Nbytes)); - C_h[i] = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(C_h[i] != NULL); - } - - hipStream_t stream[numDevices]; - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - HIPCHECK(hipMemcpyWithStream(A_d[0], A_h[0], Nbytes, hipMemcpyHostToDevice, stream[0])); - HIPCHECK(hipMemcpyWithStream(B_d[0], B_h[0], Nbytes, hipMemcpyHostToDevice, stream[0])); - - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipMemcpyWithStream(A_d[i], A_d[0], Nbytes, hipMemcpyDefault, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_d[0], Nbytes, hipMemcpyDefault, stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], - static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); // hipMemcpy will be on this device - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[0], B_h[0], C_h[i], N); - } - - HipTest::freeArrays(A_d[0], B_d[0], C_d[0], A_h[0], B_h[0], C_h[0], false); - HIPCHECK(hipStreamDestroy(stream[0])); - - for (int i=1; i < numDevices; ++i) { - if (A_d[i]) { - HIPCHECK(hipFree(A_d[i])); - } - if (B_d[i]) { - HIPCHECK(hipFree(B_d[i])); - } - if (C_d[i]) { - HIPCHECK(hipFree(C_d[i])); - } - if (C_h[i]) { - free(C_h[i]); - } - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamtests::TestkindHtoH(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_h, *B_h; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - // Allocate memory to A_h and B_h - A_h = static_cast(malloc(Nbytes)); - HIPASSERT(A_h != NULL); - B_h = static_cast(malloc(Nbytes)); - HIPASSERT(B_h != NULL); - - for (size_t i = 0; i < N; ++i) { - if (A_h) (A_h)[i] = 3.146f + i; // Pi - } - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(B_h, A_h, Nbytes, hipMemcpyHostToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - - for (size_t i = 0; i < N; i++) { - HIPASSERT(A_h[i] == B_h[i]); - } - - if (A_h) { - free(A_h); - } - if (B_h) { - free(B_h); - } - HIPCHECK(hipStreamDestroy(stream)); -} - -int main() { - HipMemcpyWithStreamtests tests; - tests.TestwithOnestream(); - test_passed(TestwithOnestream); - tests.TestwithTwoStream(); - test_passed(TestwithTwoStream); - tests.TestkindDtoH(); - test_passed(TestkindsDtoH); - tests.TestkindDefault(); - test_passed(TestkindDefault); - tests.TestDtoDonSameDevice(); - test_passed(TestDtoDonSameDevice); - tests.TestOnMultiGPUwithOneStream(); - test_passed(TestOnMultiGPUwithOneStream); - tests.TestkindDtoD(); - test_passed(TestkindDtoD); -#ifndef __HIP_PLATFORM_NVIDIA__ - tests.TestkindDefaultForDtoD(); - test_passed(TestkindDefaultForDtoD); -#endif - tests.TestkindHtoH(); - test_passed(TestkindsHtoH); -} diff --git a/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp b/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp deleted file mode 100644 index ddd9c853ea..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp +++ /dev/null @@ -1,668 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Different test for checking functionality of - * hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, - * hipMemcpyKind kind, hipStream_t stream); - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -#define LEN 64 -#define SIZE LEN << 2 -#define THREADS 2 -#define MAX_THREADS 16 - - -#define test_passed(test_name) printf("%s %s PASSED!%s\n", \ - KGRN, #test_name, KNRM); -#define test_failed(test_name) printf("%s %s FAILED!%s\n", \ - KRED, #test_name, KNRM); - -enum class ops -{ TestwithOnestream, - TestwithTwoStream, - TestOnMultiGPUwithOneStream, - TestkindDtoH, - TestkindDtoD, - TestkindHtoH, - TestkindDefault, -#ifndef __HIP_PLATFORM_NVIDIA__ - TestkindDefaultForDtoD, -#endif - TestDtoDonSameDevice, - END_OF_LIST -}; - - -class HipMemcpyWithStreamMultiThreadtests { - // Test hipMemcpyWithStream with one streams and launch kernel in - // that stream, verify the data. - void TestwithOnestream(void); - // Test hipMemcpyWithStream with two streams and launch kernels in - // two streams, verify the data. - void TestwithTwoStream(void); - // Test hipMemcpyWithStream with one stream for each gpu and launch - // kernels in each, verify the data - void TestOnMultiGPUwithOneStream(void); - // Test hipMemcpyWithStream to copy data from - // device to host (hipMemcpyDeviceToHost). - void TestkindDtoH(void); - // Test hipMemcpyWithStream with hipMemcpyDeviceToDevice on MultiGPU. - void TestkindDtoD(void); - // Test hipMemcpyWithStream with hipMemcpyHostToHost. - void TestkindHtoH(void); - // Test hipMemcpyWithStream with hipMemcpyDefault. - void TestkindDefault(void); - // Test hipMemcpyWithStream with hipMemcpyDefault for - // device to device transfer case. - void TestkindDefaultForDtoD(void); - // Test hipMemcpyWithStream with hipMemcpyDeviceToDevice on same device. - void TestDtoDonSameDevice(void); - - public: - // run all the tests on multithreaded. - void TestwithMultiThreaded(ops op); -}; - -struct joinable_thread : std::thread { - template - explicit joinable_thread(Xs&&... xs) : std::thread(std::forward(xs)...) - {} // NOLINT - - joinable_thread& operator=(joinable_thread&& other) = default; - joinable_thread(joinable_thread&& other) = default; - - ~joinable_thread() { - if (this->joinable()) - this->join(); - } -}; - -void HipMemcpyWithStreamMultiThreadtests::TestwithOnestream(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(A_d, A_h, Nbytes, - hipMemcpyHostToDevice, stream)); - HIPCHECK(hipMemcpyWithStream(B_d, B_h, Nbytes, - hipMemcpyHostToDevice, stream)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream, static_cast(A_d), - static_cast(B_d), C_d, N); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipStreamDestroy(stream)); -} - -void HipMemcpyWithStreamMultiThreadtests::TestwithTwoStream(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int noOfstreams = 2; - int *A_d[noOfstreams], *B_d[noOfstreams], *C_d[noOfstreams]; - int *A_h[noOfstreams], *B_h[noOfstreams], *C_h[noOfstreams]; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - for (int i=0; i < noOfstreams; ++i) { - HipTest::initArrays(&A_d[i], &B_d[i], &C_d[i], - &A_h[i], &B_h[i], &C_h[i], N, false); - } - - hipStream_t stream[noOfstreams]; - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipStreamCreate(&stream[i])); - } - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipMemcpyWithStream(A_d[i], A_h[i], Nbytes, - hipMemcpyHostToDevice, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_h[i], Nbytes, - hipMemcpyHostToDevice, stream[i])); - } - - for (int i=0; i < noOfstreams; ++i) { - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream[i], static_cast(A_d[i]), - static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[i], B_h[i], C_h[i], N); - } - - for (int i=0; i < noOfstreams; ++i) { - HipTest::freeArrays(A_d[i], B_d[i], C_d[i], A_h[i], B_h[i], C_h[i], false); - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamMultiThreadtests::TestDtoDonSameDevice(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int noOfstreams = 2; - int *A_d[noOfstreams], *B_d[noOfstreams], *C_d[noOfstreams]; - int *A_h[noOfstreams], *B_h[noOfstreams], *C_h[noOfstreams]; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HipTest::initArrays(&A_d[0], &B_d[0], &C_d[0], - &A_h[0], &B_h[0], &C_h[0], N, false); - - - hipStream_t stream[noOfstreams]; - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMalloc(&A_d[1], Nbytes)); - HIPCHECK(hipMalloc(&B_d[1], Nbytes)); - HIPCHECK(hipMalloc(&C_d[1], Nbytes)); - C_h[1] = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(C_h[1] != NULL); - - HIPCHECK(hipMemcpyWithStream(A_d[0], A_h[0], Nbytes, - hipMemcpyHostToDevice, stream[0])); - HIPCHECK(hipMemcpyWithStream(B_d[0], B_h[0], Nbytes, - hipMemcpyHostToDevice, stream[0])); - - HIPCHECK(hipMemcpyWithStream(A_d[1], A_d[0], Nbytes, - hipMemcpyDeviceToDevice, stream[1])); - HIPCHECK(hipMemcpyWithStream(B_d[1], B_d[0], Nbytes, - hipMemcpyDeviceToDevice, stream[1])); - - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipSetDevice(0)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream[i], static_cast(A_d[i]), - static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[0], B_h[0], C_h[i], N); - } - - - HipTest::freeArrays(A_d[0], B_d[0], C_d[0], A_h[0], B_h[0], C_h[0], false); - - if (A_d[1]) { - HIPCHECK(hipFree(A_d[1])); - } - if (B_d[1]) { - HIPCHECK(hipFree(B_d[1])); - } - if (C_d[1]) { - HIPCHECK(hipFree(C_d[1])); - } - if (C_h[1]) { - free(C_h[1]); - } - - - for (int i=0; i < noOfstreams; ++i) { - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamMultiThreadtests::TestOnMultiGPUwithOneStream(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK(hipGetDeviceCount(&numDevices)); - // If you have single GPU machine the return - if (numDevices <= 1) { - return; - } - int *A_d[numDevices], *B_d[numDevices], *C_d[numDevices]; - int *A_h[numDevices], *B_h[numDevices], *C_h[numDevices]; - - hipStream_t stream[numDevices]; - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HipTest::initArrays(&A_d[i], &B_d[i], &C_d[i], - &A_h[i], &B_h[i], &C_h[i], N, false); - } - - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMemcpyWithStream(A_d[i], A_h[i], Nbytes, - hipMemcpyHostToDevice, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_h[i], Nbytes, - hipMemcpyHostToDevice, stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream[i], static_cast(A_d[i]), - static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[i], B_h[i], C_h[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HipTest::freeArrays(A_d[i], B_d[i], C_d[i], A_h[i], B_h[i], C_h[i], false); - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamMultiThreadtests::TestkindDtoH(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(A_d, A_h, Nbytes, - hipMemcpyHostToDevice, stream)); - HIPCHECK(hipMemcpyWithStream(B_d, B_h, Nbytes, - hipMemcpyHostToDevice, stream)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream, static_cast(A_d), - static_cast(B_d), C_d, N); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, - hipMemcpyDeviceToHost, stream)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipStreamDestroy(stream)); -} - - -void HipMemcpyWithStreamMultiThreadtests::TestkindDtoD(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK(hipGetDeviceCount(&numDevices)); - // If you have single GPU machine the return - if (numDevices <= 1) { - return; - } - - int *A_d[numDevices], *B_d[numDevices], *C_d[numDevices]; - int *A_h[numDevices], *B_h[numDevices], *C_h[numDevices]; - - hipStream_t stream[numDevices]; - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - // Initialize and create the host and device elements for first device - HIPCHECK(hipSetDevice(0)); - HipTest::initArrays(&A_d[0], &B_d[0], &C_d[0], - &A_h[0], &B_h[0], &C_h[0], N, false); - - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)) - HIPCHECK(hipMalloc(&A_d[i], Nbytes)); - HIPCHECK(hipMalloc(&B_d[i], Nbytes)); - HIPCHECK(hipMalloc(&C_d[i], Nbytes)); - C_h[i] = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(C_h[i] != NULL); - } - - - - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipMemcpyWithStream(A_d[0], A_h[0], Nbytes, - hipMemcpyHostToDevice, stream[0])); - HIPCHECK(hipMemcpyWithStream(B_d[0], B_h[0], Nbytes, - hipMemcpyHostToDevice, stream[0])); - - // Copying device data from 1st GPU to the rest of the the GPUs that is - // numDevices in the setup. 1st GPU start numbering from 0,1,2..n etc. - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMemcpyWithStream(A_d[i], A_d[0], Nbytes, - hipMemcpyDeviceToDevice, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_d[0], Nbytes, - hipMemcpyDeviceToDevice, stream[i])); - } - - - // Launching the kernel including the 1st GPU to the no of GPUs present - // in the setup. 1st GPU start numbering from 0,1,2..n etc. - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream[i], static_cast(A_d[i]), - static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - HipTest::checkVectorADD(A_h[0], B_h[0], C_h[i], N); - } - - HipTest::freeArrays(A_d[0], B_d[0], C_d[0], A_h[0], B_h[0], C_h[0], false); - HIPCHECK(hipStreamDestroy(stream[0])); - - for (int i=1; i < numDevices; ++i) { - if (A_d[i]) { - HIPCHECK(hipFree(A_d[i])); - } - if (B_d[i]) { - HIPCHECK(hipFree(B_d[i])); - } - if (C_d[i]) { - HIPCHECK(hipFree(C_d[i])); - } - if (C_h[i]) { - free(C_h[i]); - } - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamMultiThreadtests::TestkindDefault(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(A_d, A_h, Nbytes, hipMemcpyDefault, stream)); - HIPCHECK(hipMemcpyWithStream(B_d, B_h, Nbytes, hipMemcpyDefault, stream)); - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream, static_cast(A_d), - static_cast(B_d), C_d, N); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream)); - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipStreamDestroy(stream)); -} - -void HipMemcpyWithStreamMultiThreadtests::TestkindDefaultForDtoD(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK(hipGetDeviceCount(&numDevices)); - // Test case will not run on single GPU setup. - if (numDevices <= 1) { - return; - } - - int *A_d[numDevices], *B_d[numDevices], *C_d[numDevices]; - int *A_h[numDevices], *B_h[numDevices], *C_h[numDevices]; - - // Initialize and create the host and device elements for first device - HIPCHECK(hipSetDevice(0)); - HipTest::initArrays(&A_d[0], &B_d[0], &C_d[0], - &A_h[0], &B_h[0], &C_h[0], N, false); - - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMalloc(&A_d[i], Nbytes)); - HIPCHECK(hipMalloc(&B_d[i], Nbytes)); - HIPCHECK(hipMalloc(&C_d[i], Nbytes)); - C_h[i] = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(C_h[i] != NULL); - } - - hipStream_t stream[numDevices]; - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamCreate(&stream[i])); - } - - HIPCHECK(hipMemcpyWithStream(A_d[0], A_h[0], Nbytes, - hipMemcpyHostToDevice, stream[0])); - HIPCHECK(hipMemcpyWithStream(B_d[0], B_h[0], Nbytes, - hipMemcpyHostToDevice, stream[0])); - - // Copying device data from 1st GPU to the rest of the the GPUs - // using hipMemcpyDefault kind that is numDevices in the setup. - // 1st GPU start numbering from 0,1,2..n etc. - for (int i=1; i < numDevices; ++i) { - HIPCHECK(hipMemcpyWithStream(A_d[i], A_d[0], Nbytes, - hipMemcpyDefault, stream[i])); - HIPCHECK(hipMemcpyWithStream(B_d[i], B_d[0], Nbytes, - hipMemcpyDefault, stream[i])); - } - - for (int i=0; i < numDevices; ++i) { - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), - 0, stream[i], static_cast(A_d[i]), - static_cast(B_d[i]), C_d[i], N); - } - - for (int i=0; i < numDevices; ++i) { - HIPCHECK(hipSetDevice(i)); // hipMemcpy will be on this device - HIPCHECK(hipStreamSynchronize(stream[i])); - HIPCHECK(hipMemcpy(C_h[i], C_d[i], Nbytes, hipMemcpyDeviceToHost)); - // Output of each GPU is getting validated with input of 1st GPU. - HipTest::checkVectorADD(A_h[0], B_h[0], C_h[i], N); - } - - HipTest::freeArrays(A_d[0], B_d[0], C_d[0], A_h[0], B_h[0], C_h[0], false); - HIPCHECK(hipStreamDestroy(stream[0])); - - for (int i=1; i < numDevices; ++i) { - if (A_d[i]) { - HIPCHECK(hipFree(A_d[i])); - } - if (B_d[i]) { - HIPCHECK(hipFree(B_d[i])); - } - if (C_d[i]) { - HIPCHECK(hipFree(C_d[i])); - } - if (C_h[i]) { - free(C_h[i]); - } - HIPCHECK(hipStreamDestroy(stream[i])); - } -} - -void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(void) { - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int *A_h, *B_h; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - // Allocate memory to A_h and B_h - A_h = static_cast(malloc(Nbytes)); - HIPASSERT(A_h != NULL); - B_h = static_cast(malloc(Nbytes)); - HIPASSERT(B_h != NULL); - - for (size_t i = 0; i < N; ++i) { - if (A_h) { - (A_h)[i] = 3.146f + i; // Pi - } - } - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - HIPCHECK(hipMemcpyWithStream(B_h, A_h, Nbytes, hipMemcpyHostToHost, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - - for (size_t i = 0; i < N; i++) { - HIPASSERT(A_h[i] == B_h[i]); - } - - if (A_h) { - free(A_h); - } - if (B_h) { - free(B_h); - } - HIPCHECK(hipStreamDestroy(stream)); -} - - -void HipMemcpyWithStreamMultiThreadtests::TestwithMultiThreaded(ops op) { - size_t thread_count = getHostThreadCount(); - if (thread_count == 0) { - failed("Thread Count is 0"); - } - std::vector threads; - - for (uint32_t i = 0; i < thread_count; i++) { - threads.emplace_back(std::thread{[&] { - switch ( op ) { - case ops::TestwithOnestream: - TestwithOnestream(); - break; - case ops::TestwithTwoStream: - TestwithTwoStream(); - break; - case ops::TestkindDtoH: - TestkindDtoH(); - break; - case ops::TestkindHtoH: - TestkindHtoH(); - break; - case ops::TestkindDtoD: - TestkindDtoD(); - break; - case ops::TestOnMultiGPUwithOneStream: - TestOnMultiGPUwithOneStream(); - break; - case ops::TestkindDefault: - TestkindDefault(); - break; -#ifndef __HIP_PLATFORM_NVIDIA__ - case ops::TestkindDefaultForDtoD: - TestkindDefaultForDtoD(); - break; -#endif - case ops::TestDtoDonSameDevice: - TestDtoDonSameDevice(); - break; - default:{} - } - }}); - } -} - - -int main() { - HipMemcpyWithStreamMultiThreadtests tests; - for (int op = static_cast(ops::TestwithOnestream); - op < static_cast(ops::END_OF_LIST); ++op) { - tests.TestwithMultiThreaded(static_cast(op)); - switch ( static_cast(op) ) { - case ops::TestwithOnestream: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestwithOnestream); - break; - case ops::TestwithTwoStream: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestwithTwoStream); - break; - case ops::TestkindDtoH: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestkindDtoH); - break; - case ops::TestkindHtoH: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestkindHtoH); - break; - case ops::TestkindDtoD: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestkindDtoD); - break; - case ops::TestOnMultiGPUwithOneStream: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestOnMultiGPUwithOneStream); - break; - case ops::TestkindDefault: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestkindDefault); - break; -#ifndef __HIP_PLATFORM_NVIDIA__ - case ops::TestkindDefaultForDtoD: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestkindDefaultForDtoD); - break; -#endif - case ops::TestDtoDonSameDevice: - test_passed(HipMemcpyWithStreamMultiThreadtests - ::TestDtoDonSameDevice); - break; - default: { test_failed("No Operation to done with API"); } - } - } -} diff --git a/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp b/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp deleted file mode 100644 index 88f2fc5b3b..0000000000 --- a/tests/src/runtimeApi/memory/hipMemcpy_simple.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * TEST_NAMED: %t hipMemcpyAsync-simple --async - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -bool p_async = false; - -// **************************************************************************** -hipError_t memcopy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { - if (p_async) { - return hipMemcpyAsync(dst, src, sizeBytes, kind, NULL); - } else { - return hipMemcpy(dst, src, sizeBytes, kind); - } -} - - -//--- -// Test simple H2D copies and back. -// Designed to stress a small number of simple smoke tests -void simpleTest1() { - printf("test: %s\n", __func__); - size_t Nbytes = N * sizeof(int); - printf("N=%zu Nbytes=%6.2fMB\n", N, Nbytes / 1024.0 / 1024.0); - - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - - printf("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HIPCHECK(memcopy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(memcopy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, N); - - HIPCHECK(memcopy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIPCHECK(hipDeviceReset()); - - printf(" %s success\n", __func__); -} - - -template -void simpleTest2(size_t numElements, bool usePinnedHost) { - size_t sizeElements = numElements * sizeof(T); - size_t alignment = 4096; - printf("test: %s<%s> numElements=%zu sizeElements=%zu bytes\n", __func__, TYPENAME(T), - numElements, sizeElements); - - T *A_d, *A_h1, *A_h2; - - if (usePinnedHost) { - HIPCHECK(hipHostMalloc((void**)&A_h1, sizeElements, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&A_h2, sizeElements, hipHostMallocDefault)); - } else { - A_h1 = (T*)aligned_alloc(alignment, sizeElements); - HIPASSERT(A_h1); - A_h2 = (T*)aligned_alloc(alignment, sizeElements); - HIPASSERT(A_h1); - } - - // Alloc device array: - HIPCHECK(hipMalloc(&A_d, sizeElements)); - - - for (size_t i = 0; i < numElements; i++) { - A_h1[i] = 3.14f + 1000 * i; - A_h2[i] = - 12345678.0 + i; // init output with something distincctive, to ensure we replace it. - } - - HIPCHECK(memcopy(A_d, A_h1, sizeElements, hipMemcpyHostToDevice)); - HIPCHECK(hipDeviceSynchronize()); - HIPCHECK(memcopy(A_h2, A_d, sizeElements, hipMemcpyDeviceToHost)); - HIPCHECK(hipDeviceSynchronize()); - - for (size_t i = 0; i < numElements; i++) { - HIPASSERT(A_h1[i] == A_h2[i]); - } - - HIPCHECK(hipFree(A_d)); - if (usePinnedHost) { - HIPCHECK(hipHostFree(A_h1)); - HIPCHECK(hipHostFree(A_h2)); - } else { - aligned_free(A_h1); - aligned_free(A_h2); - } -} - - -// Parse arguments specific to this test. -void parseMyArguments(int argc, char* argv[]) { - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, "--async")) { - p_async = true; - - } else { - failed("Bad argument '%s'", arg); - } - } -}; - - -int main(int argc, char* argv[]) { - parseMyArguments(argc, argv); - - printf("info: set device to %d, tests=%x\n", p_gpuDevice, p_tests); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - - if (p_tests & 0x1) { - printf("\n\n=== tests&1\n"); - HIPCHECK(hipDeviceReset()); - simpleTest1(); - printf("===\n\n\n"); - } - - if (p_tests & 0x2) { - printf("\n\n=== tests&2 (copy ping-pong, pinned host)\n"); - simpleTest2(N, true /*usePinnedHost*/); - simpleTest2(N, true /*usePinnedHost*/); - } - - if (p_tests & 0x4) { - printf("\n\n=== tests&4 (copy ping-pong, unpinned host)\n"); - simpleTest2(N, false /*usePinnedHost*/); - simpleTest2(N, false /*usePinnedHost*/); - } - - hipDeviceSynchronize(); - hipDeviceReset(); - - int v; - hipDriverGetVersion(&v); - - passed(); -}; diff --git a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp deleted file mode 100644 index b1150c4531..0000000000 --- a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherent.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * HIT_END - */ - - -#include -#include "hip/hip_runtime.h" - -__global__ void Kernel(volatile float* hostRes) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - hostRes[tid] = tid + 1; - __threadfence_system(); - // expecting that the data is getting flushed to host here! - // time waster for-loop (sleep) - for (int timeWater = 0; timeWater < 100000000; timeWater++) - ; -} - -int main() { - size_t blocks = 2; - volatile float* hostRes; - hipHostMalloc((void**)&hostRes, blocks * sizeof(float), hipHostMallocMapped); - hostRes[0] = 0; - hostRes[1] = 0; - hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks), 0, 0, hostRes); - int eleCounter = 0; - while (eleCounter < blocks) { - // blocks until the value changes - while (hostRes[eleCounter] == 0) - ; - printf("%f\n", hostRes[eleCounter]); - ; - eleCounter++; - } - hipHostFree((void*)hostRes); - return 0; -} diff --git a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp b/tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp deleted file mode 100644 index 9f184ca911..0000000000 --- a/tests/src/runtimeApi/memory/hipMemoryAllocateCoherentDriver.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -associated documentation files (the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, publish, distribute, -sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -using namespace std; - -const string coherent_test_dir = string(".") + PATH_SEPERATOR_STR + "directed_tests" - + PATH_SEPERATOR_STR + "runtimeApi" + PATH_SEPERATOR_STR - + "memory"; - -string getRes() { - FILE* in; - char buff[512], buff_2[512]; - string coherent_test_str = "hipMemoryAllocateCoherent"; - string str = coherent_test_dir + PATH_SEPERATOR_STR + coherent_test_str; - if (!(in = popen(str.c_str(), "r"))) { - exit(1); - } - fgets(buff, sizeof(buff), in); - fgets(buff_2, sizeof(buff_2), in); - string str_buff = buff; - str_buff += buff_2; - pclose(in); - return str_buff; -} - -int main() { - setenv("HIP_COHERENT_HOST_ALLOC", "1000,0,1", 1); - string output = getRes(); - istringstream buffer(output); - double res1, res2; - buffer >> res1; - buffer >> res2; - if ((res2 - res1 * 2) > 0.000001) exit(1); - std::cout << "PASSED" << std::endl; - return 0; -} diff --git a/tests/src/runtimeApi/memory/hipMemset.cpp b/tests/src/runtimeApi/memory/hipMemset.cpp deleted file mode 100644 index 6b958166f1..0000000000 --- a/tests/src/runtimeApi/memory/hipMemset.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for memset. -// Also serves as a template for other tests. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * //Small copy - * TEST: %t -N 10 --memsetval 0x42 --memsetD32val 0x101 --memsetD16val 0x10 --memsetD8val 0x1 - * // Oddball size - * TEST: %t -N 10013 --memsetval 0x5a --memsetD32val 0xDEADBEEF --memsetD16val 0xDEAD --memsetD8val 0xDE - * // Big copy - * TEST: %t -N 256M --memsetval 0xa6 --memsetD32val 0xCAFEBABE --memsetD16val 0xCAFE --memsetD8val 0xCA - * HIT_END - */ -#define MAX_OFFSET 3 -// To test memset on unaligned pointer -#define loop(offset, offsetMax) for (int offset = offsetMax; offset >= 0; offset --) - -#include -#include "hip/hip_runtime.h" -#include "test_common.h" -enum MemsetType { - hipMemsetTypeDefault, - hipMemsetTypeD8, - hipMemsetTypeD16, - hipMemsetTypeD32 -}; - -bool testhipMemsetSmallSize(int memsetval, int p_gpuDevice) -{ - char *A_d; - char *A_h; - bool testResult = true; - for ( size_t iSize = 1; iSize < 4; iSize++ ) { - size_t Nbytes = iSize * sizeof(char); - HIPCHECK(hipMalloc(&A_d, Nbytes)); - A_h = reinterpret_cast (malloc(Nbytes)); - printf("testhipMemsetSmallSize N=%zu memsetval=%2x device=%d\n", - iSize, memsetval, p_gpuDevice); - HIPCHECK(hipMemset(A_d, memsetval, Nbytes)); - HIPCHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - - for ( int i = 0; i < iSize; i++ ) { - if ( A_h[i] != memsetval ) { - testResult = false; - printf("mismatch at index:%d computed:%02x, memsetval:%02x\n", - i, static_cast (A_h[i]), static_cast (memsetval)); - break; - } - } - HIPCHECK(hipFree(A_d)); - free(A_h); - } - return testResult; -} - -template -bool testhipMemset(T*A_h, T*A_d, T memsetval, enum MemsetType type, - int p_gpuDevice) { - size_t Nbytes = N * sizeof(T); - bool testResult = true; - HIPCHECK(hipMalloc(&A_d, Nbytes)); - A_h = reinterpret_cast (malloc(Nbytes)); - loop(offset, MAX_OFFSET) { - if (type == hipMemsetTypeDefault) { - printf("testhipMemset N=%zu memsetval=%2x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemset(A_d + offset, memsetval, N - offset)); - } else if (type == hipMemsetTypeD8) { - printf("testhipMemsetD8 N=%zu memsetD8val=%4x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetD8((hipDeviceptr_t)(A_d + offset), memsetval, N - offset)); - } else if (type == hipMemsetTypeD16) { - printf("testhipMemsetD16 N=%zu memsetD16val=%4x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetD16((hipDeviceptr_t)(A_d + offset), memsetval, N - offset)); - } else if (type == hipMemsetTypeD32) { - printf("testhipMemsetD32 N=%zu memsetD32val=%8x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetD32((hipDeviceptr_t)(A_d + offset), memsetval, N - offset)); - } - HIPCHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost) ); - for ( int i = offset; i < N; i++ ) { - if (A_h[i] != memsetval) { - testResult = false; - printf("mismatch at index:%d computed:%02x, memsetval:%02x\n", - i, static_cast (A_h[i]), static_cast (memsetval)); - break; - } - } - } - HIPCHECK(hipFree(A_d)); - free(A_h); - return testResult; -} - -template -bool testhipMemsetAsync(T*A_h, T*A_d, T memsetval, enum MemsetType type, - int p_gpuDevice) { - size_t Nbytes = N * sizeof(T); - bool testResult = true; - HIPCHECK(hipMalloc(reinterpret_cast (&A_d), Nbytes)); - A_h = reinterpret_cast (malloc(Nbytes)); - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - loop(offset, MAX_OFFSET) { - if (type == hipMemsetTypeDefault) { - printf("testhipMemsetAsync N=%zu memsetval=%2x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetAsync(A_d+offset, memsetval, Nbytes-offset, stream)); - } else if (type == hipMemsetTypeD8) { - printf("testhipMemsetD8Async N=%zu memsetD8val=%2x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream)); - } else if (type == hipMemsetTypeD16) { - printf("testhipMemsetD16Async N=%zu memsetD16val=%8x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream)); - } else if (type == hipMemsetTypeD32) { - printf("testhipMemsetD32Async N=%zu memsetD32val=%8x device=%d\n", - (N - offset), memsetval, p_gpuDevice); - HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)(A_d + offset), memsetval, N - offset, stream)); - } - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy(A_h, reinterpret_cast (A_d), Nbytes, hipMemcpyDeviceToHost)); - - for ( int i = offset; i < N; i++ ) { - if (A_h[i] != memsetval) { - testResult = false; - printf("mismatch at index:%d computed:%02x\n", i, static_cast (A_h[i])); - break; - } - } - } - HIPCHECK(hipFree(reinterpret_cast (A_d)) ); - HIPCHECK(hipStreamDestroy(stream)); - free(A_h); - return testResult; -} - -bool testhipMemset2AsyncOps() { - printf("testhipMemset2AsyncOps 2 memset operations at the same time\n"); - std::vector v; - v.resize(2048); - float* p2, *p3; - hipMalloc(reinterpret_cast(&p2), 4096 + 4096*2); - p3 = p2+2048; - hipStream_t s; - hipStreamCreate(&s); - hipMemsetAsync(p2, 0, 32*32*4, s); - hipMemsetD32Async((hipDeviceptr_t)p3, 0x3fe00000, 32*32, s ); - hipStreamSynchronize(s); - for (int i = 0; i < 256; ++i) { - hipMemsetAsync(p2, 0, 32*32*4, s); - hipMemsetD32Async((hipDeviceptr_t)p3, 0x3fe00000, 32*32, s ); - } - hipStreamSynchronize(s); - hipDeviceSynchronize(); - hipMemcpy(&v[0], p2, 1024, hipMemcpyDeviceToHost); - hipMemcpy(&v[1024], p3, 1024, hipMemcpyDeviceToHost); - if ((v[0] != 0) || (v[1024] != 1.75f)) { - printf("mismatch (%f != 0) or (%f != 1.75f)\n", v[0], v[1024]); - return false; - } - return true; -} - -int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool testResult = true; - char * cA_d = NULL; - char * cA_h = NULL; - int16_t * siA_d = NULL; - int16_t * siA_h = NULL; - int32_t * iA_d = NULL; - int32_t * iA_h = NULL; - HIPCHECK(hipSetDevice(p_gpuDevice)); - testResult &= testhipMemsetSmallSize(memsetval, p_gpuDevice); - - testResult &= testhipMemset(cA_h, cA_d, memsetval, hipMemsetTypeDefault, p_gpuDevice); - testResult &= testhipMemset(iA_h, iA_d, memsetD32val, hipMemsetTypeD32, p_gpuDevice); - testResult &= testhipMemset(siA_h, siA_d, memsetD16val, hipMemsetTypeD16, p_gpuDevice); - testResult &= testhipMemset(cA_h, cA_d, memsetD8val, hipMemsetTypeD8, p_gpuDevice); - - testResult &= testhipMemsetAsync(cA_h, cA_d, memsetval, hipMemsetTypeDefault, p_gpuDevice); - testResult &= testhipMemsetAsync(iA_h, iA_d, memsetD32val, hipMemsetTypeD32, p_gpuDevice); - testResult &= testhipMemsetAsync(siA_h, siA_d, memsetD16val, hipMemsetTypeD16, p_gpuDevice); - testResult &= testhipMemsetAsync(cA_h, cA_d, memsetD8val, hipMemsetTypeD8, p_gpuDevice); - testResult &= testhipMemset2AsyncOps(); - if (testResult) passed(); - failed("Output Mismatch\n"); -} diff --git a/tests/src/runtimeApi/memory/hipMemset2D.cpp b/tests/src/runtimeApi/memory/hipMemset2D.cpp deleted file mode 100644 index 15b762476f..0000000000 --- a/tests/src/runtimeApi/memory/hipMemset2D.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -// Test for hipMemset2D functionality for different width and height values - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST_NAMED: %t hipMemset2D-basic - * TEST_NAMED: %t hipMemset2D-dim1 --width2D 10 --height2D 10 --memsetWidth 4 --memsetHeight 4 - * TEST_NAMED: %t hipMemset2D-dim2 --width2D 100 --height2D 100 --memsetWidth 20 --memsetHeight 40 - * TEST_NAMED: %t hipMemset2D-dim3 --width2D 256 --height2D 256 --memsetWidth 39 --memsetHeight 19 - * TEST_NAMED: %t hipMemset2D-zeroH --width2D 100 --height2D 100 --memsetWidth 20 --memsetHeight 0 - * TEST_NAMED: %t hipMemset2D-zeroW --width2D 100 --height2D 100 --memsetWidth 0 --memsetHeight 20 - * TEST_NAMED: %t hipMemset2D-zeroW*H --width2D 100 --height2D 100 --memsetWidth 0 --memsetHeight 0 - * HIT_END - */ - -#include "test_common.h" - -// Check hipMemset2D functionality -bool testhipMemset2D(int memsetval, int p_gpuDevice) { - bool testResult = true; - size_t numH = 256; - size_t numW = 256; - size_t pitch_A; - size_t width = numW * sizeof(char); - size_t sizeElements = width * numH; - size_t elements = numW* numH; - printf("testhipMemset2D memsetval=%2x device=%d\n", memsetval, p_gpuDevice); - char *A_d; - char *A_h; - - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width , - numH)); - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - - for (size_t i=0; i < elements; i++) { - A_h[i] = 1; - } - - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH)); - HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH, - hipMemcpyDeviceToHost)); - - for (int i=0; i < elements; i++) { - if (A_h[i] != memsetval) { - testResult = false; - printf("testhipMemset2D mismatch at index:%d computed:%02x, memsetval:" - "%02x\n", i, static_cast(A_h[i]), static_cast(memsetval)); - break; - } - } - - hipFree(A_d); - free(A_h); - return testResult; -} - -// Check hipMemset2DAsync functionality -bool testhipMemset2DAsync(int memsetval, int p_gpuDevice) { - size_t numH = 256; - size_t numW = 256; - size_t pitch_A; - size_t width = numW * sizeof(char); - size_t sizeElements = width * numH; - size_t elements = numW * numH; - printf("testhipMemset2DAsync memsetval=%2x device=%d\n", memsetval, - p_gpuDevice); - char *A_d; - char *A_h; - bool testResult = true; - - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, - width , numH)); - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - - for (size_t i=0; i < elements; i++) { - A_h[i] = 1; - } - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH, - hipMemcpyDeviceToHost)); - - for (int i=0; i < elements; i++) { - if (A_h[i] != memsetval) { - testResult = false; - printf("testhipMemset2DAsync mismatch at index:%d computed:%02x, memsetval:" - "%02x\n", i, static_cast(A_h[i]), static_cast(memsetval)); - break; - } - } - - hipFree(A_d); - HIPCHECK(hipStreamDestroy(stream)); - free(A_h); - return testResult; -} - -int width2D = 20; -int height2D = 20; -int memsetWidth = 20; -int memsetHeight = 20; - -int parseExtraArguments(int argc, char* argv[]) { - int i = 0; - for (i = 1; i < argc; i++) { - const char* arg = argv[i]; - if (!strcmp(arg, " ")) { - // skip NULL args. - } else if (!strcmp(arg, "--width2D")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &width2D)) { - failed("Bad width2D argument"); - } - } else if (!strcmp(arg, "--height2D")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &height2D)) { - failed("Bad height2D argument"); - } - } else if (!strcmp(arg, "--memsetWidth")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &memsetWidth)) { - failed("Bad memsetWidth argument"); - } - } else if (!strcmp(arg, "--memsetHeight")) { - if (++i >= argc || !HipTest::parseInt(argv[i], &memsetHeight)) { - failed("Bad memsetHeight argument"); - } - } else { - failed("Bad argument"); - } - } - return i; -} - -// Memset random dimensions -bool testMemset2DPartial(int memsetval, int p_gpuDevice) { - bool testResult = true; - size_t NUM_H = height2D; - size_t NUM_W = width2D; - size_t Nbytes = N*sizeof(char); - size_t pitch_A; - size_t width = NUM_W * sizeof(char); - size_t sizeElements = width * NUM_H; - size_t elements = NUM_W * NUM_H; - char *A_d; - char *A_h; - printf("testhipMemset2DPartial memsetval=%2x device=%d\n", memsetval, - p_gpuDevice); - - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, - width, NUM_H)); - hipError_t e; - int index; - - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - - for (index = 0; index < sizeElements; index++) { - A_h[0] = 'c'; - } - - printf("2D Dimension: %zuX%zu, MemsetWidth:%d, memsetHeight:%d\n", - NUM_W, NUM_H, memsetWidth, memsetHeight); - e = hipMemset2D(A_d, pitch_A, memsetval, memsetWidth, memsetHeight); - HIPASSERT(e == hipSuccess); - - HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, NUM_W, NUM_H, - hipMemcpyDeviceToHost)); - - for (int row = 0; row < memsetHeight; row++) { - for (int column = 0; column < memsetWidth; column++) { - if (A_h[(row * width) + column] != memsetval) { - printf("A_h[%d][%d] did not match %d", row, column, memsetval); - testResult = false; - } - } - } - hipFree(A_d); - free(A_h); - return testResult; -} - -int main(int argc, char *argv[]) { - int extraArgs = 0; - bool testResult = true; - - checkImageSupport(); - HIPCHECK(hipSetDevice(p_gpuDevice)); - extraArgs = HipTest::parseStandardArguments(argc, argv, false); - parseExtraArguments(extraArgs, argv); - - if (extraArgs == 1) { - testResult &= testhipMemset2D(memsetval, p_gpuDevice); - if (!(testResult)) { - printf("hipMemset2D failed\n"); - } - testResult &= testhipMemset2DAsync(memsetval, p_gpuDevice); - if (!(testResult)) { - printf("hipMemset2DAsync failed\n"); - } - } else if (extraArgs == 9) { - testResult &= testMemset2DPartial(memsetval, p_gpuDevice); - if (!(testResult)) { - printf("hipMemset2D at random dimensions failed\n"); - } - } else { - failed("Wrong Arguments for test\n"); - } - - if (testResult) { - passed(); - } else { - failed("one or more hipMemset2D tests failed"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp b/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp deleted file mode 100644 index 09057677ac..0000000000 --- a/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. -*/ - -// -// Test to verify -// a) Order of execution of device kernel and hipMemset2DAsync api -// b) hipMemSet2DAsync execution in multiple threads -// - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define NUM_THREADS 1000 -#define ITER 100 -#define NUM_H 256 -#define NUM_W 256 - -unsigned blocks = 0; -hipStream_t stream; - -bool testResult = true; -char *A_d, *A_h, *B_d, *B_h, *C_d; -int validateCount; - -size_t pitch_A, pitch_B, pitch_C; -size_t width = NUM_W * sizeof(char); -size_t sizeElements = width * NUM_H; -size_t elements = NUM_W * NUM_H; - -/* - * Square each element in the array B and write to array C. - */ - -__global__ void -vector_square(char* B_d, char* C_d, size_t elements) { - for (int i=0 ; i < elements ; i++) { - C_d[i] = B_d[i] * B_d[i]; - } -} - -void memAllocate() { - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, NUM_H)); - HIPCHECK(hipMallocPitch(reinterpret_cast(&B_d), &pitch_B, width, NUM_H)); - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - B_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(B_h != NULL); - HIPCHECK(hipMallocPitch(reinterpret_cast(&C_d), &pitch_C, width, NUM_H)); - - for (int i = 0 ; i < elements ; i++) { - B_h[i] = i; - } - HIPCHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H, - hipMemcpyHostToDevice)); - HIPCHECK(hipStreamCreate(&stream)); -} - -void memDeallocate() { - HIPCHECK(hipFree(A_d)); HIPCHECK(hipFree(B_d)); HIPCHECK(hipFree(C_d)); - free(A_h); free(B_h); - HIPCHECK(hipStreamDestroy(stream)); -} - -void queueJobsForhipMemset2DAsync(char* A_d, char* A_h, size_t pitch, - size_t width) { - HIPCHECK(hipMemset2DAsync(A_d, pitch, memsetval, NUM_W, NUM_H, stream)); - HIPCHECK(hipMemcpy2DAsync(A_h, width, A_d, pitch, NUM_W, NUM_H, - hipMemcpyDeviceToHost, stream)); -} - -bool testhipMemset2DAsyncWithKernel() { - validateCount = 0; - memAllocate(); - printf("info: Launching vector_square kernel and hipMemset2DAsync " - "simultaneously\n"); - for (int k = 0 ; k < ITER ; k++) { - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, - stream, B_d, C_d, elements); - - HIPCHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H, - hipMemcpyDeviceToHost)); - - for (int p = 0 ; p < elements ; p++) { - if (A_h[p] == memsetval) { - validateCount+= 1; - } - } - } - - testResult = (validateCount == (ITER * elements)) ? true : false; - memDeallocate(); - return testResult; -} - -bool testhipMemset2DAsyncMultiThread() { - validateCount = 0; - auto thread_count = getHostThreadCount(200, NUM_THREADS); - if (thread_count == 0) { - failed("Thread count is 0"); - } - std::thread *t = new std::thread[thread_count]; - - memAllocate(); - - printf("info: Queueing up hipMemset2DAsync jobs over multiple threads\n"); - for (int i = 0 ; i < ITER ; i++) { - for (int k = 0 ; k < thread_count; k++) { - if (k%2) { - t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, A_h, pitch_A, - width); - } else { - t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, B_h, pitch_A, - width); - } - } - for (int j = 0 ; j < thread_count; j++) { - t[j].join(); - } - - HIPCHECK(hipStreamSynchronize(stream)); - for (int k = 0 ; k < elements ; k++) { - if ((A_h[k] == memsetval) && (B_h[k] == memsetval)) { - validateCount+= 1; - } - } - } - memDeallocate(); - delete[] t; - testResult = (validateCount == (ITER * elements)) ? true : false; - return testResult; -} - -int main() { - bool testResult = true; - - blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - printf("blocks: %u\n", blocks); - - testResult &= testhipMemset2DAsyncWithKernel(); - if (testResult) { - printf("Kernel and hipMemset2DAsync executed in correct order!\n"); - } else { - printf("Kernel and hipMemset2DAsync order of execution failed\n"); - } - - testResult &= testhipMemset2DAsyncMultiThread(); - if (testResult) { - printf("hipMemset2DAsync jobs on all threads finished successfully!\n"); - passed(); - } else { - printf("hipMemset2DAsync failed in multi thread scenario\n"); - } - - if (testResult) { - passed(); - } else { - failed("One or more tests failed\n"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemset3D.cpp b/tests/src/runtimeApi/memory/hipMemset3D.cpp deleted file mode 100644 index e4662f4d7d..0000000000 --- a/tests/src/runtimeApi/memory/hipMemset3D.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for memset. -// Also serves as a template for other tests. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -bool testhipMemset3D(int memsetval,int p_gpuDevice) -{ - size_t numH = 256; - size_t numW = 256; - size_t depth = 10; - size_t width = numW * sizeof(char); - size_t sizeElements = width * numH * depth; - size_t elements = numW* numH* depth; - - - printf ("testhipMemset3D memsetval=%2x device=%d\n", memsetval, p_gpuDevice); - char *A_h; - bool testResult = true; - hipExtent extent = make_hipExtent(width, numH, depth); - hipPitchedPtr devPitchedPtr; - - HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); - A_h = (char*)malloc(sizeElements); - HIPASSERT(A_h != NULL); - for (size_t i=0; i(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - memset(A_h, 0, sizeElements); - - HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); - if (bAsync) { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - if ((ret = hipMemset3DAsync(devPitchedPtr, MEMSETVAL, extent, stream)) - != hipSuccess) { - printf("testMemsetWithExtent(%zu,%zu,%zu) Async: Expected to return" - " success but returned Error: '%s'(%d)\n", extent.width, - extent.height, extent.depth, hipGetErrorString(ret), ret); - testPassed &= false; - } - - if ((ret = hipMemset3DAsync(devPitchedPtr, TESTVAL, tstExtent, stream)) - != hipSuccess) { - printf("testMemsetWithExtent(%zu,%zu,%zu) Async: Expected to return" - " success but returned Error: '%s'(%d)\n", tstExtent.width, - tstExtent.height, tstExtent.depth, hipGetErrorString(ret), ret); - testPassed &= false; - } - - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipStreamDestroy(stream)); - } else { - if ((ret = hipMemset3D(devPitchedPtr, MEMSETVAL, extent)) - != hipSuccess) { - printf("testMemsetWithExtent(%zu,%zu,%zu) : Expected to return" - " success but returned Error: '%s'(%d)\n", extent.width, - extent.height, extent.depth, hipGetErrorString(ret), ret); - testPassed &= false; - } - - if ((ret = hipMemset3D(devPitchedPtr, TESTVAL, tstExtent)) - != hipSuccess) { - printf("testMemsetWithExtent(%zu,%zu,%zu) : Expected to return" - " success but returned Error: '%s'(%d)\n", tstExtent.width, - tstExtent.height, tstExtent.depth, hipGetErrorString(ret), ret); - testPassed &= false; - } - } - - if (testPassed) { - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); - myparms.srcPtr = devPitchedPtr; - myparms.extent = extent; - #ifdef __HIP_PLATFORM_NVCC__ - myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); - #else - myparms.kind = hipMemcpyDeviceToHost; - #endif - - HIPCHECK(hipMemcpy3D(&myparms)); - - for (int i = 0; i < elements; i++) { - if (A_h[i] != MEMSETVAL) { - testPassed = false; - printf("testMemsetWithExtent: mismatch at index:%d computed:%02x, " - "memsetval:%02x\n", i, static_cast(A_h[i]), - static_cast(MEMSETVAL)); - break; - } - } - } - - HIPCHECK(hipFree(devPitchedPtr.ptr)); - free(A_h); - return testPassed; -} - -/** - * Validates data after performing memory set operation with max memset value - */ -bool testMemsetMaxValue(bool bAsync) { - hipPitchedPtr devPitchedPtr; - bool testPassed = true; - unsigned char *A_h; - int memsetval = std::numeric_limits::max(); - size_t numH = NUMH_MAX, numW = NUMW_MAX, depth = DEPTH_MAX; - size_t width = numW * sizeof(unsigned char); - hipExtent extent = make_hipExtent(width, numH, depth); - size_t sizeElements = width * numH * depth; - size_t elements = numW* numH* depth; - - A_h = reinterpret_cast (malloc(sizeElements)); - HIPASSERT(A_h != NULL); - memset(A_h, 0, sizeElements); - - HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); - if (bAsync) { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipStreamDestroy(stream)); - } else { - HIPCHECK(hipMemset3D(devPitchedPtr, memsetval, extent)); - } - - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); - myparms.srcPtr = devPitchedPtr; - myparms.extent = extent; -#ifdef __HIP_PLATFORM_NVCC__ - myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); -#else - myparms.kind = hipMemcpyDeviceToHost; -#endif - - HIPCHECK(hipMemcpy3D(&myparms)); - - for (int i = 0; i < elements; i++) { - if (A_h[i] != memsetval) { - testPassed = false; - printf("testMemsetMaxValue: mismatch at index:%d computed:%02x, " - "memsetval:%02x\n", i, static_cast(A_h[i]), - static_cast(memsetval)); - break; - } - } - HIPCHECK(hipFree(devPitchedPtr.ptr)); - free(A_h); - return testPassed; -} - -/** - * Function seeks device ptr to random slice and performs Memset operation - * on the slice selected. - */ -bool seekAndSet3DArraySlice(bool bAsync) { - char array3D[ZSIZE_S][YSIZE_S][XSIZE_S] = {0}; - bool testPassed = true; - dim3 arr_dimensions = dim3(ZSIZE_S, YSIZE_S, XSIZE_S); - hipExtent extent = make_hipExtent(sizeof(char) * arr_dimensions.x, - arr_dimensions.y, arr_dimensions.z); - hipPitchedPtr devicePitchedPointer; - int memsetval = MEMSETVAL, memsetval4seeked = TESTVAL; - - HIPCHECK(hipMalloc3D(&devicePitchedPointer, extent)); - HIPCHECK(hipMemset3D(devicePitchedPointer, memsetval, extent)); - - // select random slice for memset - unsigned int seed = time(NULL); - int slice_index = rand_r(&seed) % ZSIZE_S; - - printf("memset3d for sliceindex %d\n", slice_index); - - // Get attributes from device pitched pointer - size_t pitch = devicePitchedPointer.pitch; - size_t slicePitch = pitch * extent.height; - - // Point devptr to selected slice - char *devPtrSlice = (reinterpret_cast(devicePitchedPointer.ptr)) - + slice_index * slicePitch; - hipExtent extentSlice = make_hipExtent(sizeof(char) * arr_dimensions.x, - arr_dimensions.y, 1); - hipPitchedPtr modDevPitchedPtr = make_hipPitchedPtr(devPtrSlice, pitch, - arr_dimensions.x, arr_dimensions.y); - - if (bAsync) { - // Memset selected slice (Async) - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMemset3DAsync(modDevPitchedPtr, memsetval4seeked, - extentSlice, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipStreamDestroy(stream)); - } else { - // Memset selected slice - HIPCHECK(hipMemset3D(modDevPitchedPtr, memsetval4seeked, extentSlice)); - } - - // Copy result back to host buffer - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.dstPtr = make_hipPitchedPtr(array3D, sizeof(char) * arr_dimensions.x, - arr_dimensions.x, arr_dimensions.y); - myparms.srcPtr = devicePitchedPointer; - myparms.extent = extent; -#ifdef __HIP_PLATFORM_NVCC__ - myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); -#else - myparms.kind = hipMemcpyDeviceToHost; -#endif - - HIPCHECK(hipMemcpy3D(&myparms)); - - for (int z = 0; z < ZSIZE_S; z++) { - for (int y = 0; y < YSIZE_S; y++) { - for (int x = 0; x < XSIZE_S; x++) { - if (z == slice_index) { - if (array3D[z][y][x] != memsetval4seeked) { - testPassed = false; - printf("seekAndSet3DArray Slice: mismatch at index: Arr(%d,%d,%d)" - " computed:%02x, memsetval:%02x\n", z, y, x, - array3D[z][y][x], memsetval4seeked); - break; - } - } else { - if (array3D[z][y][x] != memsetval) { - testPassed = false; - printf("seekAndSet3DArray Slice: mismatch at index: Arr(%d,%d,%d)" - " computed:%02x, memsetval:%02x\n", z, y, x, - array3D[z][y][x], memsetval); - break; - } - } - } - } - } - - HIPCHECK(hipFree(devicePitchedPointer.ptr)); - return testPassed; -} - -/** - * Function seeks device ptr to selected portion of 3d array - * and performs Memset operation on the portion. - */ -bool seekAndSet3DArrayPortion(bool bAsync) { - char array3D[ZSIZE_P][YSIZE_P][XSIZE_P] = {0}; - bool testPassed = true; - dim3 arr_dimensions = dim3(ZSIZE_P, YSIZE_P, XSIZE_P); - hipExtent extent = make_hipExtent(sizeof(char) * arr_dimensions.x, - arr_dimensions.y, arr_dimensions.z); - hipPitchedPtr devicePitchedPointer; - int memsetval = MEMSETVAL, memsetval4seeked = TESTVAL; - - HIPCHECK(hipMalloc3D(&devicePitchedPointer, extent)); - HIPCHECK(hipMemset3D(devicePitchedPointer, memsetval, extent)); - - // For memsetting extent/size(10,10,10) in the mid portion of cube(30,30,30), - // seek device ptr to (10,10,10) and then memset 10 bytes across x,y,z axis. - size_t pitch = devicePitchedPointer.pitch; - size_t slicePitch = pitch * extent.height; - int slice_index = ZPOS_START, y = YPOS_START, x = XPOS_START; - - // Select 10th slice - char *devPtrSlice = (reinterpret_cast(devicePitchedPointer.ptr)) - + slice_index * slicePitch; - - // Now select row at height as 10 - char *current_row = reinterpret_cast(devPtrSlice + y * pitch); - - // Now select index of selected row as 10 - char *devPtrIndexed = ¤t_row[x]; - - // Make dev Pitchedptr, extent - hipPitchedPtr modDevPitchedPtr = make_hipPitchedPtr(devPtrIndexed, pitch, - arr_dimensions.x, arr_dimensions.y); - hipExtent setExtent = make_hipExtent(sizeof(char) * XSET_LEN, YSET_LEN, - ZSET_LEN); - - if (bAsync) { - // Memset selected portion (Async) - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMemset3DAsync(modDevPitchedPtr, memsetval4seeked, - setExtent, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipStreamDestroy(stream)); - } else { - // Memset selected portion - HIPCHECK(hipMemset3D(modDevPitchedPtr, memsetval4seeked, setExtent)); - } - - // Copy result back to host buffer - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.dstPtr = make_hipPitchedPtr(array3D, sizeof(char) * arr_dimensions.x, - arr_dimensions.x, arr_dimensions.y); - myparms.srcPtr = devicePitchedPointer; - myparms.extent = extent; -#ifdef __HIP_PLATFORM_NVCC__ - myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); -#else - myparms.kind = hipMemcpyDeviceToHost; -#endif - - HIPCHECK(hipMemcpy3D(&myparms)); - - for (int z = 0; z < ZSIZE_P; z++) { - for (int y = 0; y < YSIZE_P; y++) { - for (int x = 0; x < XSIZE_P; x++) { - if ((z >= ZPOS_START && z <= ZPOS_END) && - (y >= YPOS_START && y <= YPOS_END) && - (x >= XPOS_START && x <= XPOS_END)) { - if (array3D[z][y][x] != memsetval4seeked) { - testPassed = false; - printf("seekAndSet3DArray Portion: mismatch at index: Arr(%d,%d,%d)" - " computed:%02x, memsetval:%02x\n", z, y, x, - array3D[z][y][x], memsetval4seeked); - break; - } - } else { - if (array3D[z][y][x] != memsetval) { - testPassed = false; - printf("seekAndSet3DArray Portion: mismatch at index: Arr(%d,%d,%d)" - " computed:%02x, memsetval:%02x\n", z, y, x, - array3D[z][y][x], memsetval); - break; - } - } - } - } - } - - HIPCHECK(hipFree(devicePitchedPointer.ptr)); - return testPassed; -} - - -int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - - if (p_tests == 1) { - hipExtent testExtent; - size_t numH = NUMH_EXT, numW = NUMW_EXT, depth = DEPTH_EXT; - - // Memset with extent width(0) and verify data to be intact - testExtent = make_hipExtent(0, numH, depth); - TestPassed &= testMemsetWithExtent(0, testExtent); - TestPassed &= testMemsetWithExtent(1, testExtent); - - // Memset with extent height(0) and verify data to be intact - testExtent = make_hipExtent(numW, 0, depth); - TestPassed &= testMemsetWithExtent(0, testExtent); - TestPassed &= testMemsetWithExtent(1, testExtent); - - // Memset with extent depth(0) and verify data to be intact - testExtent = make_hipExtent(numW, numH, 0); - TestPassed &= testMemsetWithExtent(0, testExtent); - TestPassed &= testMemsetWithExtent(1, testExtent); - - // Memset with extent width,height,depth as 0 and verify data to be intact - testExtent = make_hipExtent(0, 0, 0); - TestPassed &= testMemsetWithExtent(0, testExtent); - TestPassed &= testMemsetWithExtent(1, testExtent); - } else if (p_tests == 2) { - // Memset with max unsigned char and verify memset is success - TestPassed &= testMemsetMaxValue(0); - TestPassed &= testMemsetMaxValue(1); - } else if (p_tests == 3) { - // Seek and set random slice of 3d array - TestPassed &= seekAndSet3DArraySlice(0); - TestPassed &= seekAndSet3DArraySlice(1); - } else if (p_tests == 4) { - // Memset selected portion of 3d array - TestPassed &= seekAndSet3DArrayPortion(0); - TestPassed &= seekAndSet3DArrayPortion(1); - } else { - printf("Didnt receive any valid option. Try options 1 to 4\n"); - TestPassed = false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipMemset3DFunctional validation Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemset3DNegative.cpp b/tests/src/runtimeApi/memory/hipMemset3DNegative.cpp deleted file mode 100644 index 99caf73b7d..0000000000 --- a/tests/src/runtimeApi/memory/hipMemset3DNegative.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - - (TestCase 1):: - 1) Test hipMemset3D() with uninitialized devPitchedPtr. - 2) Test hipMemset3DAsync() with uninitialized devPitchedPtr. - - (TestCase 2):: - 3) Reset devPitchedPtr to zero and check return value for hipMemset3D(). - 4) Reset devPitchedPtr to zero and check return value for hipMemset3DAsync(). - - (TestCase 3) - 5) Test hipMemset3D() with extent.width as max size_t and keeping height, - depth as valid values. - 6) Test hipMemset3DAsync() with extent.width as max size_t and keeping height, - depth as valid values. - 7) Test hipMemset3D() with extent.height as max size_t and keeping width, - depth as valid values. - 8) Test hipMemset3DAsync() with extent.height as max size_t and keeping width, - depth as valid values. - 9) Test hipMemset3D() with extent.depth as max size_t and keeping height, - width as valid values. -10) Test hipMemset3DAsync() with extent.depth as max size_t and keeping height, - width as valid values. - - (TestCase 4) -11) Device Ptr out bound and extent(0) passed for hipMemset3D(). -12) Device Ptr out bound and extent(0) passed for hipMemset3DAsync(). - - (TestCase 5) -13) Device Ptr out bound and valid extent passed for hipMemset3D(). -14) Device Ptr out bound and valid extent passed for hipMemset3DAsync(). -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t --tests 1 - * TEST: %t --tests 2 - * TEST: %t --tests 3 - * TEST: %t --tests 4 - * TEST: %t --tests 5 - * HIT_END - */ - -#include "test_common.h" - -int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - hipStream_t stream; - hipError_t ret; - hipPitchedPtr devPitchedPtr; - bool TestPassed = true; - int memsetval = 1; - size_t numH = 256; - size_t numW = 256; - size_t depth = 10; - size_t width = numW * sizeof(char); - hipExtent extent = make_hipExtent(width, numH, depth); - - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); - - if (p_tests == 1) { - // Use uninitialized devpitched ptr - hipPitchedPtr devPitchedUnPtr; - - if ((ret = hipMemset3D(devPitchedUnPtr, memsetval, extent)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "uninit devpitched ptr. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(devPitchedUnPtr, memsetval, extent, stream)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "uninit devpitched ptr(Async). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - } else if (p_tests == 2) { - // Reset devPitchedPtr to zero - hipPitchedPtr rdevPitchedPtr = {0}; - - if ((ret = hipMemset3D(rdevPitchedPtr, memsetval, extent)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "rdevPitchedPtr(0). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(rdevPitchedPtr, memsetval, extent, stream)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "rdevPitchedPtr(0). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - } else if (p_tests == 3) { - // Pass extent fields as max size_t - hipExtent extMW = make_hipExtent(std::numeric_limits::max(), - numH, - depth); - hipExtent extMH = make_hipExtent(width, - std::numeric_limits::max(), - depth); - hipExtent extMD = make_hipExtent(width, - numH, - std::numeric_limits::max()); - - if ((ret = hipMemset3D(devPitchedPtr, memsetval, extMW)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "extent.width max(size_t). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(devPitchedPtr, memsetval, extMW, stream)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "extent.width max(size_t) Async. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3D(devPitchedPtr, memsetval, extMH)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "extent.height max(size_t). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(devPitchedPtr, memsetval, extMH, stream)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "extent.height max(size_t) Async. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } -#ifdef __HIP_PLATFORM_AMD__ - if ((ret = hipMemset3D(devPitchedPtr, memsetval, extMD)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "extent.depth max(size_t). Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(devPitchedPtr, memsetval, extMD, stream)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for " - "extent.depth max(size_t) Async. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } -#else - printf("Cuda doesn't check the maximum depth of extent field\n"); -#endif - } else if (p_tests == 4) { - // Device Ptr out bound and extent(0) passed for memset - - size_t pitch = devPitchedPtr.pitch; - size_t slicePitch = pitch * extent.height; - - // Point devptr to end of allocated memory - char *devPtrMod = (reinterpret_cast(devPitchedPtr.ptr)) - + depth * slicePitch; - - // Advance devptr further to go out of boundary - devPtrMod = devPtrMod + 10; - hipPitchedPtr modDevPitchedPtr = make_hipPitchedPtr(devPtrMod, pitch, - numW * sizeof(char), numH); - hipExtent extent0 = {0}; - if ((ret = hipMemset3D(modDevPitchedPtr, memsetval, extent0)) - != hipSuccess) { - printf("ArgValidation : Inappropriate error value returned when " - "deviceptr goes out of boundary. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(modDevPitchedPtr, memsetval, extent0, stream)) - != hipSuccess) { - printf("ArgValidation : Inappropriate error value returned when " - "deviceptr goes out of boundary Async. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - } else if (p_tests == 5) { - // Device Ptr out bound and valid extent passed for memset - - size_t pitch = devPitchedPtr.pitch; - size_t slicePitch = pitch * extent.height; - - // Point devptr to end of allocated memory - char *devPtrMod = (reinterpret_cast(devPitchedPtr.ptr)) - + depth * slicePitch; - - // Advance devptr further to go out of boundary - devPtrMod = devPtrMod + 10; - hipPitchedPtr modDevPitchedPtr = make_hipPitchedPtr(devPtrMod, pitch, - numW * sizeof(char), numH); - if ((ret = hipMemset3D(modDevPitchedPtr, memsetval, extent)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned when " - "deviceptr goes out of boundary. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipMemset3DAsync(modDevPitchedPtr, memsetval, extent, stream)) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned when " - "deviceptr goes out of boundary Async. Error: '%s'(%d)\n", - hipGetErrorString(ret), ret); - TestPassed &= false; - } - } else { - printf("Didnt receive any valid option. Try options 1 to 5\n"); - TestPassed = false; - } - - HIPCHECK(hipStreamDestroy(stream)); - HIPCHECK(hipFree(devPitchedPtr.ptr)); - - if (TestPassed) { - passed(); - } else { - failed("hipMemset3DNegative validation Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemset3DRegressMultiThread.cpp b/tests/src/runtimeApi/memory/hipMemset3DRegressMultiThread.cpp deleted file mode 100644 index 7ae81ad695..0000000000 --- a/tests/src/runtimeApi/memory/hipMemset3DRegressMultiThread.cpp +++ /dev/null @@ -1,329 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - - (TestCase 1):: - 1) Validate Async behavior of hipMemset3DAsync with commands queued - concurrently from multiple threads. - 2) Validate hipMemset3DAsync behavior when api is queued along with kernel - function operating on same memory. - - (TestCase 2):: - 3) Perform regression of hipMemset3D api in loop with device memory allocated - on different gpus. - 4) Perform regression of hipMemset3DAsync api in loop with device memory - allocated on different gpus. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 LINK_OPTIONS -lpthread - * TEST: %t --tests 1 - * HIT_END - */ - -#include -#ifdef __linux__ -#include -#include -#endif -#include -#include "test_common.h" - - -/* - * Defines - */ -#define MAX_REGRESS_ITERS 20 - -/** - * kernel function sets device memory with value passed - */ -__global__ void func_set_value(hipPitchedPtr devicePitchedPointer, - hipExtent extent, - unsigned char val) { - // Index Calculation - int x = threadIdx.x + blockDim.x * blockIdx.x; - int y = threadIdx.y + blockDim.y * blockIdx.y; - int z = threadIdx.z + blockDim.z * blockIdx.z; - - // Get attributes from device pitched pointer - char *devicePointer = reinterpret_cast(devicePitchedPointer.ptr); - size_t pitch = devicePitchedPointer.pitch; - size_t slicePitch = pitch * extent.height; - - // Loop over the device buffer - if (z < extent.depth) { - char *current_slice_index = devicePointer + z * slicePitch; - if (y < extent.height) { - // Get data array containing all elements from the current row - char *current_row = reinterpret_cast(current_slice_index - + y * pitch); - if (x < extent.width) { - current_row[x] = val; - } - } - } -} - -/** - * Fetches Gpu device count - */ -void getDeviceCount(int *pdevCnt) { -#ifdef __linux__ - int fd[2], val = 0; - pid_t childpid; - - // create pipe descriptors - pipe(fd); - - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - - childpid = fork(); - - if (childpid > 0) { // Parent - close(fd[1]); - // parent will wait to read the device cnt - read(fd[0], &val, sizeof(val)); - - // close the read-descriptor - close(fd[0]); - - // wait for child exit - wait(NULL); - - *pdevCnt = val; - } else if (!childpid) { // Child - int devCnt = 1; - // writing only, no need for read-descriptor - close(fd[0]); - - HIPCHECK(hipGetDeviceCount(&devCnt)); - // send the value on the write-descriptor: - write(fd[1], &devCnt, sizeof(devCnt)); - - // close the write descriptor: - close(fd[1]); - exit(0); - } else { // failure - *pdevCnt = 1; - return; - } - -#else - HIPCHECK(hipGetDeviceCount(pdevCnt)); -#endif -} - -/** - * Performs api regression in loop - */ -bool loopRegression(bool bAsync) { - bool testPassed = true; - char *A_h; - int memsetval = 1, numGpu = 0, hasPeerAccess = 0; - size_t numH = 256, numW = 100, depth = 10; - size_t width = numW * sizeof(char); - hipExtent extent = make_hipExtent(width, numH, depth); - size_t sizeElements = width * numH * depth; - size_t elements = numW* numH* depth; - std::vector devPitchedPtrlist; - hipPitchedPtr pitchedPtr, devpPtr; - - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - memset(A_h, 0, sizeElements); - - // Populate hipMemcpy3D parameters - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); - myparms.extent = extent; -#ifdef __HIP_PLATFORM_NVCC__ - myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); -#else - myparms.kind = hipMemcpyDeviceToHost; -#endif - - getDeviceCount(&numGpu); - - // Alloc 3D arrays in all GPUs - for (int j = 0; j < numGpu; j++) { - HIPCHECK(hipSetDevice(j)); - HIPCHECK(hipMalloc3D(&pitchedPtr, extent)); - devPitchedPtrlist.push_back(pitchedPtr); - } - - for (int itern = 0; itern < MAX_REGRESS_ITERS; itern++) { - // Validate hipMemset3D data consistency in multiple iters - for (int i = 0; i < numGpu; i++) { - for (int j = 0; j < numGpu; j++) { - HIPCHECK(hipDeviceCanAccessPeer(&hasPeerAccess, i, j)); - if (!hasPeerAccess) { - // Skip and continue if no peer access - continue; - } - HIPCHECK(hipSetDevice(i)); - devpPtr = devPitchedPtrlist[j]; - HIPCHECK(hipMemset3D(devpPtr, 0, extent)); - - if (bAsync) { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMemset3DAsync(devpPtr, memsetval, extent, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - HIPCHECK(hipStreamDestroy(stream)); - } else { - HIPCHECK(hipMemset3D(devpPtr, memsetval, extent)); - } - - myparms.srcPtr = devpPtr; - memset(A_h, 0, sizeElements); - HIPCHECK(hipMemcpy3D(&myparms)); - - for (int indx = 0; indx < elements; indx++) { - if (A_h[indx] != memsetval) { - testPassed = false; - printf("RegressIter : mismatch at index:%d computed:%02x, " - "memsetval:%02x\n", indx, static_cast(A_h[indx]), - static_cast(memsetval)); - break; - } - } - } - } - } - - for (int j = 0; j < numGpu; j++) { - HIPCHECK(hipFree(devPitchedPtrlist[j].ptr)); - } - - free(A_h); - return testPassed; -} - - -/** - * Thread function queues kernel function and memset cmds - */ -void threadFunc(hipStream_t stream, hipPitchedPtr devpPtr, int memsetval, - int testval, hipExtent extent, hipMemcpy3DParms myparms) { - // Kernel Launch Configuration - dim3 threadsPerBlock = dim3(8, 8, 8); - dim3 blocks; - blocks = dim3((extent.width + threadsPerBlock.x - 1) / threadsPerBlock.x, - (extent.height + threadsPerBlock.y - 1) / threadsPerBlock.y, - (extent.depth + threadsPerBlock.z - 1) / threadsPerBlock.z); - - hipLaunchKernelGGL(func_set_value, dim3(blocks), dim3(threadsPerBlock), 0, - stream, devpPtr, extent, memsetval); - HIPCHECK(hipMemset3DAsync(devpPtr, testval, extent, stream)); - HIPCHECK(hipMemcpy3DAsync(&myparms, stream)); -} - -/** - * Async commands queued concurrently and executed - */ -bool validateAsyncConcurrencyMthread() { - bool testPassed = true; - char *A_h; - int memsetval = 1, numGpu = 0, testval = 2; - size_t numH = 256, numW = 100, depth = 10; - size_t width = numW * sizeof(char); - hipExtent extent = make_hipExtent(width, numH, depth); - size_t sizeElements = width * numH * depth; - size_t elements = numW* numH* depth; - hipPitchedPtr devpPtr; - hipStream_t stream; - - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipMalloc3D(&devpPtr, extent)); - - A_h = reinterpret_cast(malloc(sizeElements)); - HIPASSERT(A_h != NULL); - memset(A_h, 0, sizeElements); - - // Populate hipMemcpy3D parameters - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.srcPtr = devpPtr; - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); - myparms.extent = extent; -#ifdef __HIP_PLATFORM_NVCC__ - myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); -#else - myparms.kind = hipMemcpyDeviceToHost; -#endif - - std::vector threadlist; - - // Queue cmds concurrently from multiple threads on same stream - for (int i = 0; i < MAX_THREADS; i++) { - threadlist.push_back(std::thread(threadFunc, stream, devpPtr, memsetval, - testval, extent, myparms)); - } - - for (auto &t : threadlist) { - t.join(); - } - - HIPCHECK(hipStreamSynchronize(stream)); - - for (int k = 0 ; k < elements ; k++) { - if (A_h[k] != testval) { - printf("validateAsyncConcurrencyMthread: Test failed\n"); - testPassed = false; - break; - } - } - - HIPCHECK(hipStreamDestroy(stream)); - free(A_h); - HIPCHECK(hipFree(devpPtr.ptr)); - return testPassed; -} - - -int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - - if (p_tests == 1) { - TestPassed = validateAsyncConcurrencyMthread(); - } else if (p_tests == 2) { - /* TODO : Loop regression test auto execution in HIT is currently disabled. - To be enabled back after HIP API fix */ - TestPassed &= loopRegression(0); - TestPassed &= loopRegression(1); - } else { - printf("Didnt receive any valid option. Try options 1 to 2\n"); - TestPassed = false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipMemset3DRegressMultiThread() validation Failed!"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemsetAsyncAndKernel.cpp b/tests/src/runtimeApi/memory/hipMemsetAsyncAndKernel.cpp deleted file mode 100644 index 043b5f66a0..0000000000 --- a/tests/src/runtimeApi/memory/hipMemsetAsyncAndKernel.cpp +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. -*/ - -/* - * Test for checking order of execution of device kernel and - * hipMemsetAsync apis on all gpus - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define ITER 10 -#define N 1024 * 1024 - -unsigned blocks = 0; - -template -__global__ void vector_square(T* B_d, T* C_d, size_t M) { - for (int i=0 ; i < M ; i++) { - C_d[i] = B_d[i] * B_d[i]; - } -} - -template -class MemSetTest { - public: - T *A_h, *B_d, *B_h, *C_d; - T memSetVal; - size_t Nbytes; - bool testResult = true; - int validateCount = 0; - hipStream_t stream; - - void memAllocate(T memSetValue) { - memSetVal = memSetValue; - Nbytes = N * sizeof(T); - - A_h = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(A_h != NULL); - HIPCHECK(hipMalloc(&B_d , Nbytes)); - B_h = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(B_h != NULL); - HIPCHECK(hipMalloc(&C_d , Nbytes)); - - for (int i = 0 ; i < N ; i++) { - B_h[i] = i; - } - HIPCHECK(hipMemcpy(B_d , B_h , Nbytes , hipMemcpyHostToDevice)); - HIPCHECK(hipStreamCreate(&stream)); - } - - void memDeallocate() { - HIPCHECK(hipFree(B_d)); HIPCHECK(hipFree(C_d)); - free(B_h); free(A_h); - HIPCHECK(hipStreamDestroy(stream)); - } - - void validateExecutionOrder() { - for (int p = 0 ; p < N ; p++) { - if (A_h[p] == memSetVal) { - validateCount+= 1; - } - } - } - - bool resultAfterAllIterations() { - testResult = (validateCount == (ITER * N)) ? true : false; - memDeallocate(); - return testResult; - } -}; - -bool testhipMemsetAsyncWithKernel() { - MemSetTest obj; - obj.memAllocate(memsetval); - for (int k = 0 ; k < ITER ; k++) { - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, - obj.stream, obj.B_d, obj.C_d, N); - HIPCHECK(hipMemsetAsync(obj.C_d , obj.memSetVal , N , obj.stream)); - HIPCHECK(hipStreamSynchronize(obj.stream)); - HIPCHECK(hipMemcpy(obj.A_h , obj.C_d , obj.Nbytes , hipMemcpyDeviceToHost)); - - obj.validateExecutionOrder(); - } - return obj.resultAfterAllIterations(); -} - -bool testhipMemsetD32AsyncWithKernel() { - MemSetTest obj; - obj.memAllocate(memsetD32val); - for (int k = 0 ; k < ITER ; k++) { - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, - obj.stream, obj.B_d, obj.C_d, N); - HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)obj.C_d , obj.memSetVal , N , obj.stream)); - HIPCHECK(hipStreamSynchronize(obj.stream)); - HIPCHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost)); - - obj.validateExecutionOrder(); - } - return obj.resultAfterAllIterations(); -} - -bool testhipMemsetD16AsyncWithKernel() { - MemSetTest obj; - obj.memAllocate(memsetD16val); - for (int k = 0 ; k < ITER ; k++) { - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, - obj.stream, obj.B_d, obj.C_d, N); - HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)obj.C_d , obj.memSetVal , N , obj.stream)); - HIPCHECK(hipStreamSynchronize(obj.stream)); - HIPCHECK(hipMemcpy(obj.A_h , obj.C_d, obj.Nbytes , hipMemcpyDeviceToHost)); - - obj.validateExecutionOrder(); - } - return obj.resultAfterAllIterations(); -} - -bool testhipMemsetD8AsyncWithKernel() { - MemSetTest obj; - obj.memAllocate(memsetD8val); - for (int k = 0; k < ITER; k++) { - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, - obj.stream, obj.B_d, obj.C_d, N); - HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)obj.C_d, obj.memSetVal, N, obj.stream)); - HIPCHECK(hipStreamSynchronize(obj.stream)); - HIPCHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost)); - - obj.validateExecutionOrder(); - } - return obj.resultAfterAllIterations(); -} - -int main() { - bool testResult = true; - int numDevices = 0; - blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - printf("blocks: %u\n", blocks); - - HIPCHECK(hipGetDeviceCount(&numDevices)); - printf("total number of gpus in the system: %d\n", numDevices); - - for (int i = 0; i < numDevices; i++) { - HIPCHECK(hipSetDevice(i)); - printf("test running on gpu %d\n", i); - - testResult &= testhipMemsetAsyncWithKernel(); - if (!(testResult)) { - printf("Mismatch in order of execution of hipMemsetAsync and kernel\n"); - } - - testResult &= testhipMemsetD32AsyncWithKernel(); - if (!(testResult)) { - printf("Mismatch in order of execution of hipMemsetD32Async and kernel\n"); - } - - testResult &= testhipMemsetD16AsyncWithKernel(); - if (!(testResult)) { - printf("Mismatch in order of execution of hipMemsetD16Async and kernel\n"); - } - - testResult &= testhipMemsetD8AsyncWithKernel(); - if (!(testResult)) { - printf("Mismatch in order of execution of hipMemsetD8Async and kernel\n"); - } - } - - if (testResult) { - printf("Execution order of Kernel and hipMemsetAsync apis on " - "all gpus is correct!\n"); - passed(); - } else { - failed("One or more hipMemsetAsync tests failed\n"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemsetAsyncMultiThread.cpp b/tests/src/runtimeApi/memory/hipMemsetAsyncMultiThread.cpp deleted file mode 100644 index 7f6d379dc2..0000000000 --- a/tests/src/runtimeApi/memory/hipMemsetAsyncMultiThread.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. -*/ - -/* - * Test that validates functionality of hipmemsetAsync apis over multi threads - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define NUM_THREADS 50 -#define ITER 50 - -template -class MemSetTest { - public: - T *A_h, *A_d, *B_h; - T memSetVal; - size_t Nbytes; - bool testResult = true; - int validateCount = 0; - hipStream_t stream; - - void memAllocate(T memSetValue) { - memSetVal = memSetValue; - Nbytes = N * sizeof(T); - - A_h = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(A_h != NULL); - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - B_h = reinterpret_cast(malloc(Nbytes)); - HIPASSERT(B_h != NULL); - - HIPCHECK(hipStreamCreate(&stream)); - } - - void threadCompleteStatus() { - for (int k = 0 ; k < N ; k++) { - if ((A_h[k] == memSetVal) && (B_h[k] == memSetVal)) { - validateCount+= 1; - } - } - } - - bool resultAfterAllIterations() { - memDeallocate(); - testResult = (validateCount == (ITER * N)) ? true: false; - return testResult; - } - - void memDeallocate() { - HIPCHECK(hipFree(A_d)); - free(A_h); - free(B_h); - HIPCHECK(hipStreamDestroy(stream)); - } -}; - -template -void queueJobsForhipMemsetAsync(T* A_d, T* A_h, T memSetVal, size_t Nbytes, - hipStream_t stream) { - HIPCHECK(hipMemsetAsync(A_d, memSetVal, N, stream)); - HIPCHECK(hipMemcpyAsync(A_h, A_d, Nbytes, hipMemcpyDeviceToHost, stream)); -} - -template -void queueJobsForhipMemsetD32Async(T* A_d, T* A_h, T memSetVal, size_t Nbytes, - hipStream_t stream) { - HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memSetVal, N, stream)); - HIPCHECK(hipMemcpyAsync(A_h, A_d, Nbytes, hipMemcpyDeviceToHost, stream)); -} - -template -void queueJobsForhipMemsetD16Async(T* A_d, T* A_h, T memSetVal, size_t Nbytes, - hipStream_t stream) { - HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memSetVal, N, stream)); - HIPCHECK(hipMemcpyAsync(A_h, A_d, Nbytes, hipMemcpyDeviceToHost, stream)); -} - -template -void queueJobsForhipMemsetD8Async(T* A_d, T* A_h, T memSetVal, size_t Nbytes, - hipStream_t stream) { - HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memSetVal, N, stream)); - HIPCHECK(hipMemcpyAsync(A_h, A_d, Nbytes, hipMemcpyDeviceToHost, stream)); -} - -/* Queue hipMemsetAsync jobs on multiple threads and verify they all - * finished on all threads successfully - */ - -bool testhipMemsetAsyncWithMultiThread() { - MemSetTest obj; - obj.memAllocate(memsetval); - std::thread t[NUM_THREADS]; - - for (int i = 0 ; i < ITER ; i++) { - for (int k = 0 ; k < NUM_THREADS ; k++) { - if (k%2) { - t[k] = std::thread(queueJobsForhipMemsetAsync, obj.A_d, obj.A_h, - obj.memSetVal, obj.Nbytes, obj.stream); - } else { - t[k] = std::thread(queueJobsForhipMemsetAsync, obj.A_d, obj.B_h, - obj.memSetVal, obj.Nbytes, obj.stream); - } - } - - for (int j = 0 ; j < NUM_THREADS ; j++) { - t[j].join(); - } - - HIPCHECK(hipStreamSynchronize(obj.stream)); - obj.threadCompleteStatus(); - } - return obj.resultAfterAllIterations(); -} - -bool testhipMemsetD32AsyncWithMultiThread() { - MemSetTest obj; - obj.memAllocate(memsetD32val); - std::thread t[NUM_THREADS]; - - for (int i = 0 ; i < ITER ; i++) { - for (int k = 0 ; k < NUM_THREADS ; k++) { - if (k%2) { - t[k] = std::thread(queueJobsForhipMemsetD32Async, obj.A_d, - obj.A_h, obj.memSetVal, obj.Nbytes, obj.stream); - } else { - t[k] = std::thread(queueJobsForhipMemsetD32Async, obj.A_d, - obj.B_h, obj.memSetVal, obj.Nbytes, obj.stream); - } - } - - for (int j = 0 ; j < NUM_THREADS ; j++) { - t[j].join(); - } - - HIPCHECK(hipStreamSynchronize(obj.stream)); - obj.threadCompleteStatus(); - } - return obj.resultAfterAllIterations(); -} - -bool testhipMemsetD16AsyncWithMultiThread() { - MemSetTest obj; - obj.memAllocate(memsetD16val); - std::thread t[NUM_THREADS]; - - for (int i = 0 ; i < ITER ; i++) { - for (int k = 0 ; k < NUM_THREADS ; k++) { - if (k%2) { - t[k] = std::thread(queueJobsForhipMemsetD16Async, obj.A_d, - obj.A_h, obj.memSetVal, obj.Nbytes, obj.stream); - } else { - t[k] = std::thread(queueJobsForhipMemsetD16Async, obj.A_d, - obj.B_h, obj.memSetVal, obj.Nbytes, obj.stream); - } - } - - for (int j = 0 ; j < NUM_THREADS ; j++) { - t[j].join(); - } - - HIPCHECK(hipStreamSynchronize(obj.stream)); - obj.threadCompleteStatus(); - } - return obj.resultAfterAllIterations(); -} - -bool testhipMemsetD8AsyncWithMultiThread() { - MemSetTest obj; - obj.memAllocate(memsetD8val); - std::thread t[NUM_THREADS]; - - for (int i = 0 ; i < ITER ; i++) { - for (int k = 0 ; k < NUM_THREADS ; k++) { - if (k%2) { - t[k] = std::thread(queueJobsForhipMemsetD8Async, obj.A_d, - obj.A_h, obj.memSetVal, obj.Nbytes, obj.stream); - } else { - t[k] = std::thread(queueJobsForhipMemsetD8Async, obj.A_d, - obj.B_h, obj.memSetVal, obj.Nbytes, obj.stream); - } - } - for (int j = 0 ; j < NUM_THREADS ; j++) { - t[j].join(); - } - - HIPCHECK(hipStreamSynchronize(obj.stream)); - obj.threadCompleteStatus(); - } - return obj.resultAfterAllIterations(); -} - -int main() { - bool testResult = true; - printf("Queueing up hipMemSetAsync jobs on multiple threads" - "and checking results\n"); - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - printf("blocks: %u\n", blocks); - - testResult &= testhipMemsetAsyncWithMultiThread(); - if (!(testResult)) { - printf("Thread execution did not complete for hipMemsetAsync\n"); - } - - testResult &= testhipMemsetD32AsyncWithMultiThread(); - if (!(testResult)) { - printf("Thread execution did not complete for hipMemsetD32Async\n"); - } - - testResult &= testhipMemsetD16AsyncWithMultiThread(); - if (!(testResult)) { - printf("Thread execution did not complete for hipMemsetD16Async\n"); - } - testResult &= testhipMemsetD8AsyncWithMultiThread(); - if (!(testResult)) { - printf("Thread execution did not complete for hipMemsetD8Async\n"); - } - - if (testResult) { - printf("All threads ran successfully for all hipMemsetAsync apis\n"); - passed(); - } else { - failed("One or more tests failed\n"); - } -} diff --git a/tests/src/runtimeApi/memory/hipMemsetInvalidPtr.cpp b/tests/src/runtimeApi/memory/hipMemsetInvalidPtr.cpp deleted file mode 100644 index f3f0d5f478..0000000000 --- a/tests/src/runtimeApi/memory/hipMemsetInvalidPtr.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. -*/ - -// * To test invalid pointer to hipMemset* apis - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#define N 50 -#define MEMSETVAL 0x42 -#define NUM_H 256 -#define NUM_W 256 - -int main() { - size_t Nbytes = N*sizeof(char); - size_t pitch_A; - size_t width = NUM_W * sizeof(char); - size_t sizeElements = width * NUM_H; - size_t elements = NUM_W * NUM_H; - char *A_d; - - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width , NUM_H)); - - hipError_t e; - - e = hipMemset(NULL , MEMSETVAL , Nbytes); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetD32(NULL , MEMSETVAL , Nbytes); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetD16(NULL , MEMSETVAL , Nbytes); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetD8(NULL , MEMSETVAL , Nbytes); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetAsync(NULL , MEMSETVAL , Nbytes , 0); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetD32Async(NULL , MEMSETVAL , Nbytes, 0); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetD16Async(NULL , MEMSETVAL , Nbytes, 0); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemsetD8Async(NULL , MEMSETVAL , Nbytes, 0); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemset2D(NULL, pitch_A, MEMSETVAL, NUM_W, NUM_H); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemset2DAsync(NULL, pitch_A, MEMSETVAL, NUM_W, NUM_H, 0); - HIPASSERT(e == hipErrorInvalidValue); - - /* Passing host pointer to hipMemset.Ticket SWDEV-243206 is open for this. - * Disabling this test until the ticket is closed - * - char *A_h; - A_h = (char*)malloc(Nbytes); - e = hipMemset(A_h, MEMSETVAL , Nbytes); - HIPASSERT(e == hipErrorInvalidValue); - */ - - /* Passing invalid pitch to hipMemset2D.Ticket SWDEV-243104 is open for this. - * Disabling this test until the ticket is closed - * - e = hipMemset2D(A_d, 0, MEMSETVAL, NUM_W, NUM_H); - HIPASSERT(e == hipErrorInvalidValue); - - e = hipMemset2DAsync(A_d, 0, MEMSETVAL, NUM_W, NUM_H,0); - HIPASSERT(e == hipErrorInvalidValue); - */ - - hipFree(A_d); - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMultiMemcpyMultiThrdMultiStrm.cpp b/tests/src/runtimeApi/memory/hipMultiMemcpyMultiThrdMultiStrm.cpp deleted file mode 100644 index a1eebf20b1..0000000000 --- a/tests/src/runtimeApi/memory/hipMultiMemcpyMultiThrdMultiStrm.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -// Testcase Description: This test launches multiple threads which creates a stream to deploy kernel -// and also launch hipMemcpyAsync() api on the same stream. This test case is simulate the scenario -// reported in SWDEV-181598. -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define NUM_THREADS 16 - -size_t N_ELMTS = 1024; -size_t Nbytes = N_ELMTS * sizeof(float); -std::atomic Thread_count { 0 }; - -const unsigned ThreadsPerBlock = 256; -const unsigned blocks = (N_ELMTS + 255) / ThreadsPerBlock; - -__global__ void vector_square(float* C_d, float* A_d, size_t N_ELMTS) { - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - for (size_t i = gputhread; i < N_ELMTS; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } -} - -void Thread_func() { - int Data_mismatch = 0; - float *A_h, *C_h, *A_d, *C_d, *B_d; - A_h = (float*) malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (float*) malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < N_ELMTS; i++) { - A_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - HIPCHECK(hipMalloc(&B_d, Nbytes)); - hipStream_t mystream; - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(ThreadsPerBlock), 0, - mystream, C_d, A_d, N_ELMTS); - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - // The following hipMemcpyAsync() is called only to load stream with multiple Async calls - HIPCHECK(hipMemcpyAsync(B_d, A_d, Nbytes, hipMemcpyDeviceToDevice, mystream)); - Thread_count++; - - HIPCHECK(hipStreamSynchronize(mystream)); - HIPCHECK(hipStreamDestroy(mystream)); - // Verifying result of the kernel computation - for (size_t i = 0; i < N_ELMTS; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - Data_mismatch++; - } - } - // Releasing resources - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - HIPCHECK(hipFree(B_d)); - free(A_h); - free(C_h); - - if (Data_mismatch != 0) { - failed("Mismatch found in the result of the computation!"); - } -} - -int main(int argc, char* argv[]) { - - std::thread T[NUM_THREADS]; - for (int i = 0; i < NUM_THREADS; i++) { - T[i] = std::thread(Thread_func); - } - - // Wait until all the threads finish their execution - for (int i = 0; i < NUM_THREADS; i++) { - T[i].join(); - } - - if (Thread_count.load() != NUM_THREADS) { - failed( - "Seems like all the launched threads didnot complete the execution!"); - } - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipMultiMemcpyMultiThread.cpp b/tests/src/runtimeApi/memory/hipMultiMemcpyMultiThread.cpp deleted file mode 100644 index ea8f32c965..0000000000 --- a/tests/src/runtimeApi/memory/hipMultiMemcpyMultiThread.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -// Testcase Description: This test launches multiple threads which uses same stream to deploy kernel -// and also launch hipMemcpyAsync() api. This test case is simulate the scenario -// reported in SWDEV-181598. -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define NUM_THREADS 16 - -size_t N_ELMTS = 32 * 1024; -size_t Nbytes = N_ELMTS * sizeof(float); -std::atomic Thread_count { 0 }; -hipStream_t mystream; -float *A_h, *C_h, *A_d, *C_d, *B_d; - -const unsigned ThreadsPerBlock = 256; -const unsigned blocks = (N_ELMTS + 255) / ThreadsPerBlock; - -__global__ void vector_square(float* C_d, float* A_d, size_t N_ELMTS) { - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - for (size_t i = gputhread; i < N_ELMTS; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } -} - -void Thread_func() { - hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(ThreadsPerBlock), 0, - mystream, C_d, A_d, N_ELMTS); - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - // The following two MemcpyAsync calls are for sole purpose of loading stream with multiple async calls - HIPCHECK(hipMemcpyAsync(B_d, A_d, Nbytes, hipMemcpyDeviceToDevice, mystream)); - HIPCHECK(hipMemcpyAsync(B_d, A_d, Nbytes, hipMemcpyDeviceToDevice, mystream)); - Thread_count++; -} - -int main(int argc, char* argv[]) { - int Data_mismatch = 0; - A_h = (float*) malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (float*) malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < N_ELMTS; i++) { - A_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - HIPCHECK(hipMalloc(&B_d, Nbytes)); - - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - - std::thread T[NUM_THREADS]; - for (int i = 0; i < NUM_THREADS; i++) { - T[i] = std::thread(Thread_func); - } - - // Wait until all the threads finish their execution - for (int i = 0; i < NUM_THREADS; i++) { - T[i].join(); - } - - HIPCHECK(hipStreamSynchronize(mystream)); - HIPCHECK(hipStreamDestroy(mystream)); - - // Verifying the result of the kernel computation - for (size_t i = 0; i < N_ELMTS; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - Data_mismatch++; - } - } - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - HIPCHECK(hipFree(B_d)); - free(A_h); - free(C_h); - - if (Thread_count.load() != NUM_THREADS) { - failed( - "Seems like all the launched threads didnot complete the execution!"); - } else if (Data_mismatch != 0) { - failed("Mismatch found in the result of the computation!"); - } - - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipPointerAttributes.cpp b/tests/src/runtimeApi/memory/hipPointerAttributes.cpp deleted file mode 100644 index a85dc156da..0000000000 --- a/tests/src/runtimeApi/memory/hipPointerAttributes.cpp +++ /dev/null @@ -1,339 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -// Test pointer tracking logic: allocate memory and retrieve stats with hipPointerGetAttributes - -#include "hip/hip_runtime.h" -#include "test_common.h" -#include - -size_t Nbytes = 0; - -//================================================================================================= -// Utility Functions: -//================================================================================================= - -bool operator==(const hipPointerAttribute_t& lhs, const hipPointerAttribute_t& rhs) { - return ((lhs.hostPointer == rhs.hostPointer) && (lhs.devicePointer == rhs.devicePointer) && - (lhs.type == rhs.type) && (lhs.device == rhs.device) && - (lhs.allocationFlags == rhs.allocationFlags)); -}; - - -bool operator!=(const hipPointerAttribute_t& lhs, const hipPointerAttribute_t& rhs) { - return !(lhs == rhs); -} - - -const char* memoryTypeToString(hipMemoryType memoryType) { - switch (memoryType) { - case hipMemoryTypeHost: - return "[Host]"; - case hipMemoryTypeDevice: - return "[Device]"; - default: - return "[Unknown]"; - }; -} - - -void resetAttribs(hipPointerAttribute_t* attribs) { - attribs->hostPointer = (void*)(-1); - attribs->devicePointer = (void*)(-1); - attribs->type = hipMemoryTypeHost; - attribs->device = -2; - attribs->isManaged = -1; - attribs->allocationFlags = 0xffff; -}; - - -void printAttribs(const hipPointerAttribute_t* attribs) { - printf( - "hostPointer:%p devicePointer:%p type:%s deviceId:%d isManaged:%d " - "allocationFlags:%u\n", - attribs->hostPointer, attribs->devicePointer, memoryTypeToString(attribs->type), - attribs->device, attribs->isManaged, attribs->allocationFlags); -}; - - -inline int zrand(int max) { return rand() % max; } - - -//================================================================================================= -// Functions to run tests -//================================================================================================= -//-- -// Run through a couple simple cases to test lookups and host pointer arithmetic: -void testSimple() { - printf("\n"); - printf("===========================================================================\n"); - printf("Simple Tests\n"); - printf("===========================================================================\n"); - - char* A_d; - char* A_Pinned_h; - char* A_OSAlloc_h; - hipError_t e; - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipHostMalloc((void**)&A_Pinned_h, Nbytes, hipHostMallocDefault)); - A_OSAlloc_h = (char*)malloc(Nbytes); - - size_t free, total; - HIPCHECK(hipMemGetInfo(&free, &total)); - printf("hipMemGetInfo: free=%zu (%4.2f) Nbytes=%lu total=%zu (%4.2f)\n", free, - (float)(free / 1024.0 / 1024.0), Nbytes, total, (float)(total / 1024.0 / 1024.0)); - HIPASSERT(free + Nbytes <= total); - - - hipPointerAttribute_t attribs; - hipPointerAttribute_t attribs2; - - // Device memory - printf("\nDevice memory (hipMalloc)\n"); - HIPCHECK(hipPointerGetAttributes(&attribs, A_d)); - - // Check pointer arithmetic cases: - resetAttribs(&attribs2); - HIPCHECK(hipPointerGetAttributes(&attribs2, A_d + 100)); - HIPASSERT((char*)attribs.devicePointer + 100 == (char*)attribs2.devicePointer); - - // Corner case at end of array: - resetAttribs(&attribs2); - HIPCHECK(hipPointerGetAttributes(&attribs2, A_d + Nbytes - 1)); - HIPASSERT((char*)attribs.devicePointer + Nbytes - 1 == (char*)attribs2.devicePointer); - - // Pointer just beyond array - must be invalid or at least a different pointer - resetAttribs(&attribs2); - e = hipPointerGetAttributes(&attribs2, A_d + Nbytes + 1); - if (e != hipErrorInvalidValue) { - // We might have strayed into another pointer area. - HIPASSERT((char*)attribs.devicePointer != (char*)attribs2.devicePointer); - } - - - resetAttribs(&attribs2); - e = hipPointerGetAttributes(&attribs2, A_d + Nbytes); - if (e != hipErrorInvalidValue) { - HIPASSERT(attribs.devicePointer != attribs2.devicePointer); - } - - hipFree(A_d); - e = hipPointerGetAttributes(&attribs, A_d); - HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error. - - - // Device-visible host memory - printf("\nDevice-visible host memory (hipHostMalloc)\n"); - HIPCHECK(hipPointerGetAttributes(&attribs, A_Pinned_h)); - - resetAttribs(&attribs2); - HIPCHECK(hipPointerGetAttributes(&attribs2, A_Pinned_h + Nbytes / 2)); - HIPASSERT((char*)attribs.hostPointer + Nbytes / 2 == (char*)attribs2.hostPointer); - - - hipHostFree(A_Pinned_h); - e = hipPointerGetAttributes(&attribs, A_Pinned_h); - HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error. - - // OS memory - printf("\nOS-allocated memory (malloc)\n"); - e = hipPointerGetAttributes(&attribs, A_OSAlloc_h); - HIPASSERT(e == hipErrorInvalidValue); // Just freed the pointer, this should return an error. -} - -// Store the hipPointer attrib and some extra info so can later compare the looked-up info against -// the reference expectation -struct SuperPointerAttribute { - void* _pointer; - size_t _sizeBytes; - hipPointerAttribute_t _attrib; -}; - - -//--- -// Support function to check result against a reference: -void checkPointer(SuperPointerAttribute& ref, int major, int minor, void* pointer) { - hipPointerAttribute_t attribs; - resetAttribs(&attribs); - - hipError_t e = hipPointerGetAttributes(&attribs, pointer); - if ((e != hipSuccess) || (attribs != ref._attrib)) { - HIPCHECK(e); - HIPASSERT(attribs != ref._attrib); - } else { - if (p_verbose & 0x1) { - printf("#%4d.%d GOOD:%p getattr :: ", major, minor, pointer); - printAttribs(&attribs); - } - } -} - - -//--- -// Test that allocates memory across all devices withing the specified size range -// (minSize...maxSize). Then does lookups to make sure the info reported by the tracker matches -// expecations Then deallocates it all. -// -// Multiple threads can call this function and in fact we do this in the testMultiThreaded_1 test. -void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) { - printf(" clusterAllocs numAllocs=%d size=%lu..%lu\n", numAllocs, minSize, maxSize); - std::vector reference(numAllocs); - - HIPASSERT(minSize > 0); - HIPASSERT(maxSize >= minSize); - - int numDevices; - HIPCHECK(hipGetDeviceCount(&numDevices)); - - //--- - // Populate with device and host allocations. - size_t totalDeviceAllocated[numDevices]; - for (int i = 0; i < numDevices; i++) { - totalDeviceAllocated[i] = 0; - } - for (int i = 0; i < numAllocs; i++) { - bool isDevice = rand() & 0x1; - reference[i]._sizeBytes = zrand(maxSize - minSize) + minSize; - - reference[i]._attrib.device = zrand(numDevices); - HIPCHECK(hipSetDevice(reference[i]._attrib.device)); - reference[i]._attrib.isManaged = 0; - - void* ptr; - if (isDevice) { - totalDeviceAllocated[reference[i]._attrib.device] += reference[i]._sizeBytes; - HIPCHECK(hipMalloc((void**)&ptr, reference[i]._sizeBytes)); - reference[i]._attrib.type = hipMemoryTypeDevice; - reference[i]._attrib.devicePointer = ptr; - reference[i]._attrib.hostPointer = NULL; - reference[i]._attrib.allocationFlags = 0; // TODO-randomize these. - } else { - HIPCHECK(hipHostMalloc((void**)&ptr, reference[i]._sizeBytes, hipHostMallocDefault)); - reference[i]._attrib.type = hipMemoryTypeHost; - reference[i]._attrib.devicePointer = ptr; - reference[i]._attrib.hostPointer = ptr; - reference[i]._attrib.allocationFlags = 0; // TODO-randomize these. - } - reference[i]._pointer = ptr; - } - - for (int i = 0; i < numDevices; i++) { - size_t free, total; - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipMemGetInfo(&free, &total)); - printf( - " device#%d: hipMemGetInfo: free=%zu (%4.2fMB) totalDevice=%lu (%4.2fMB) total=%zu " - "(%4.2fMB)\n", - i, free, (float)(free / 1024.0 / 1024.0), totalDeviceAllocated[i], - (float)(totalDeviceAllocated[i]) / 1024.0 / 1024.0, total, - (float)(total / 1024.0 / 1024.0)); - HIPASSERT(free + totalDeviceAllocated[i] <= total); - } - - // Now look up each pointer we inserted and verify we can find it: - for (int i = 0; i < numAllocs; i++) { - SuperPointerAttribute& ref = reference[i]; - checkPointer(ref, i, 0, ref._pointer); - checkPointer(ref, i, 1, (char*)ref._pointer + ref._sizeBytes / 2); - if (ref._sizeBytes > 1) { - checkPointer(ref, i, 2, (char*)ref._pointer + ref._sizeBytes - 1); - } - - if (ref._attrib.type == hipMemoryTypeDevice) { - hipFree(ref._pointer); - } else { - hipHostFree(ref._pointer); - } - } -} - -//--- -// Multi-threaded test with many simul allocs. -// IN : serialize will force the test to run in serial fashion. -void testMultiThreaded_1(bool serialize = false) { - printf("\n===========================================================================\n"); - printf("MultiThreaded_1\n"); - if (serialize) printf("[SERIALIZE]\n"); - printf("===========================================================================\n"); - std::thread t1(clusterAllocs, 1000, 101, 1000); - if (serialize) t1.join(); - - std::thread t2(clusterAllocs, 1000, 11, 100); - if (serialize) t2.join(); - - std::thread t3(clusterAllocs, 1000, 5, 10); - if (serialize) t3.join(); - - std::thread t4(clusterAllocs, 1000, 1, 4); - if (serialize) t4.join(); - - if (!serialize) { - t1.join(); - t2.join(); - t3.join(); - t4.join(); - } - -} - -int main(int argc, char* argv[]) { - N = 1000000; - HipTest::parseStandardArguments(argc, argv, true); - - Nbytes = N * sizeof(char); - - printf("N=%zu (%6.2f MB) device=%d\n", N, Nbytes / (1024.0 * 1024.0), p_gpuDevice); - - if (p_tests & 0x01) { - printf("info: set device to %d\n", p_gpuDevice); - HIPCHECK(hipSetDevice(p_gpuDevice)); - testSimple(); - } - - if (p_tests & 0x02) { - srand(0x100); - printf("\n===========================================================================\n"); - clusterAllocs(100, 1024 * 1, 1024 * 1024); - } - - if (p_tests & 0x04) { - srand(0x200); - printf("\n===========================================================================\n"); - clusterAllocs(1000, 1, 10); // Many tiny allocations; - } - - if (p_tests & 0x08) { - srand(0x300); - testMultiThreaded_1(true); - testMultiThreaded_1(false); - } - - printf("\n"); - passed(); -} diff --git a/tests/src/runtimeApi/memory/hipRandomMemcpyAsync.cpp b/tests/src/runtimeApi/memory/hipRandomMemcpyAsync.cpp deleted file mode 100644 index 6a26971ee9..0000000000 --- a/tests/src/runtimeApi/memory/hipRandomMemcpyAsync.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define WIDTH 1024 -#define HEIGHT 1024 - -#define NUM (WIDTH * HEIGHT) - -#define THREADS_PER_BLOCK_X 16 -#define THREADS_PER_BLOCK_Y 16 -#define THREADS_PER_BLOCK_Z 1 - -int main() { - int* hostA; - int* hostB; - - int* deviceA; - int* deviceB; - - int i; - int errors; - - hostA = (int*)malloc(NUM * sizeof(int)); - hostB = (int*)malloc(NUM * sizeof(int)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - hostB[i] = i; - } - - HIPCHECK(hipMalloc((void**)&deviceA, NUM * sizeof(int))); - HIPCHECK(hipMalloc((void**)&deviceB, NUM * sizeof(int))); - - hipStream_t s; - HIPCHECK(hipStreamCreate(&s)); - - - // hostB -> deviceB -> hostA -#define ASYNC 1 -#if ASYNC - HIPCHECK(hipMemcpyAsync(deviceB, hostB, NUM * sizeof(int), hipMemcpyHostToDevice, s)); - HIPCHECK(hipMemcpyAsync(hostA, deviceB, NUM * sizeof(int), hipMemcpyDeviceToHost, s)); -#else - HIPCHECK(hipMemcpy(deviceB, hostB, NUM * sizeof(int), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(hostA, deviceB, NUM * sizeof(int), hipMemcpyDeviceToHost)); -#endif - - HIPCHECK(hipStreamSynchronize(s)); - HIPCHECK(hipDeviceSynchronize()); - - // verify the results - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostA[i] != (hostB[i])) { - errors++; - } - } - - HIPCHECK(hipStreamDestroy(s)); - - HIPCHECK(hipFree(deviceA)); - HIPCHECK(hipFree(deviceB)); - - free(hostA); - free(hostB); - - // hipResetDefaultAccelerator(); - - if (errors != 0) { - HIPASSERT(1 == 2); - } else { - passed(); - } - - return errors; -} diff --git a/tests/src/runtimeApi/memory/hipTestMemcpyPin.cpp b/tests/src/runtimeApi/memory/hipTestMemcpyPin.cpp deleted file mode 100644 index baf1fa4424..0000000000 --- a/tests/src/runtimeApi/memory/hipTestMemcpyPin.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define len 1024 * 1024 -#define size len * sizeof(float) - -int main() { - float *Ad, *A; - hipHostMalloc((void**)&A, size); - hipMalloc((void**)&Ad, size); - assert(hipSuccess == hipMemcpy(Ad, A, size, hipMemcpyHostToDevice)); - assert(hipSuccess == hipMemcpy(A, Ad, size, hipMemcpyDeviceToHost)); - passed(); -} diff --git a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp deleted file mode 100644 index 2050bd38a6..0000000000 --- a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for memset. -// Also serves as a template for other tests. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int elementSizes[] = {1, 16, 1024, 524288, 16 * 1000 * 1000}; -int nSizes = sizeof(elementSizes) / sizeof(int); - -int enablePeers(int dev0, int dev1) { - int canAccessPeer01, canAccessPeer10; - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer01, dev0, dev1)); - HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer10, dev1, dev0)); - if (!canAccessPeer01 || !canAccessPeer10) { - return -1; - } - - HIPCHECK(hipSetDevice(dev0)); - HIPCHECK(hipDeviceEnablePeerAccess(dev1, 0 /*flags*/)); - HIPCHECK(hipSetDevice(dev1)); - HIPCHECK(hipDeviceEnablePeerAccess(dev0, 0 /*flags*/)); - - return 0; -}; - -// Set value of array to specified 32-bit integer: -__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - ptr[i] = val; - } -}; - -__global__ void memcpyIntKernel(const int* src, int* dst, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - dst[i] = src[i]; - } -}; - - -// CHeck arrays in reverse order, to more easily detect cases where -// the copy is "partially" done. -void checkReverse(const int* ptr, int numElements, int expected) { - for (int i = numElements - 1; i >= 0; i--) { - if (ptr[i] != expected) { - printf("i=%d, ptr[](%d) != expected (%d)\n", i, ptr[i], expected); - assert(ptr[i] == expected); - } - } - - printf("test: OK\n"); -} - - -void runTestImpl(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_t gpu1Stream, - int numElements, int* dataGpu0_0, int* dataGpu0_1, int* dataGpu1, int* dataHost, - int expected) { - hipEvent_t e; - if (!hostSync) { - HIPCHECK(hipEventCreateWithFlags(&e, 0)); - } - const size_t sizeElements = numElements * sizeof(int); - printf("test: runTestImpl with %zu bytes %s with hostSync %s\n", sizeElements, - stepAIsCopy ? "copy" : "kernel", hostSync ? "enabled" : "disabled"); - - hipStream_t stepAStream = gpu0Stream; - - if (stepAIsCopy) { - HIPCHECK(hipMemcpyAsync(dataGpu1, dataGpu0_0, sizeElements, hipMemcpyDeviceToDevice, - stepAStream)); - } else { - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, gpu0Stream, - dataGpu0_0, dataGpu1, numElements); - } - - if (!hostSync) { - HIPCHECK(hipEventRecord(e, stepAStream)); - HIPCHECK(hipStreamWaitEvent(gpu1Stream, e, 0)); - } else { - HIPCHECK(hipStreamSynchronize(stepAStream)); - } - - HIPCHECK( - hipMemcpyAsync(dataGpu0_1, dataGpu1, sizeElements, hipMemcpyDeviceToDevice, gpu1Stream)); - - if (!hostSync) { - HIPCHECK(hipEventRecord(e, gpu1Stream)); - } else { - HIPCHECK(hipStreamSynchronize(gpu1Stream)); - } - - HIPCHECK(hipMemcpyAsync(dataHost, dataGpu0_1, sizeElements, hipMemcpyDeviceToHost, gpu0Stream)); - HIPCHECK(hipStreamSynchronize(gpu0Stream)); - - checkReverse(dataHost, numElements, expected); - if (!hostSync) { - HIPCHECK(hipEventDestroy(e)); - } -} - -void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync) { - const size_t sizeElements = numElements * sizeof(int); - - int *dataGpu0_0, *dataGpu0_1, *dataGpu1, *dataHost; - hipStream_t gpu0Stream, gpu1Stream; - const int expected = 42; - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - - HIPCHECK(hipSetDevice(dev0)); - - HIPCHECK(hipMalloc(&dataGpu0_0, sizeElements)); - HIPCHECK(hipMalloc(&dataGpu0_1, sizeElements)); - HIPCHECK(hipStreamCreate(&gpu0Stream)); - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, gpu0Stream, - dataGpu0_0, expected, numElements); - HIPCHECK(hipDeviceSynchronize()); - - - HIPCHECK(hipSetDevice(dev1)); - HIPCHECK(hipMalloc(&dataGpu1, sizeElements)); - HIPCHECK(hipStreamCreate(&gpu1Stream)); - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, gpu0Stream, - dataGpu1, 0x34, numElements); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipHostMalloc(&dataHost, sizeElements)); - memset(dataHost, 13, sizeElements); - - printf(" test: init complete\n"); - runTestImpl(true, hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0, dataGpu0_1, - dataGpu1, dataHost, expected); - - HIPCHECK(hipFree(dataGpu0_0)); - HIPCHECK(hipFree(dataGpu0_1)); - HIPCHECK(hipFree(dataGpu1)); - HIPCHECK(hipHostFree(dataHost)); - - HIPCHECK(hipStreamDestroy(gpu0Stream)); - HIPCHECK(hipStreamDestroy(gpu1Stream)); -}; - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - - int dev0 = 0; - int dev1 = 1; - - int numDevices; - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices == 1) { - printf("warning : test requires atleast two gpus\n"); - passed(); - } - - if (enablePeers(dev0, dev1) == -1) { - printf("warning : could not find peer gpus\n"); - return -1; - }; - - for (int index = 0; index < nSizes; index++) { - //ToDo: Enable when verified on all platforms - //testMultiGpu(dev0, dev1, elementSizes[index], false /*GPU Synchronization*/); - testMultiGpu(dev0, dev1, elementSizes[index], true /*Host Synchronization*/); - } - - - passed(); -}; diff --git a/tests/src/runtimeApi/module/empty_kernel.cpp b/tests/src/runtimeApi/module/empty_kernel.cpp deleted file mode 100644 index 34c385aee6..0000000000 --- a/tests/src/runtimeApi/module/empty_kernel.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" - -extern "C" __global__ void EmptyKernel() { -} - diff --git a/tests/src/runtimeApi/module/global_kernel.cpp b/tests/src/runtimeApi/module/global_kernel.cpp deleted file mode 100644 index 8493cbda37..0000000000 --- a/tests/src/runtimeApi/module/global_kernel.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" - -#define ARRAY_SIZE (16) - -__device__ float myDeviceGlobal; -__device__ float myDeviceGlobalArray[16]; - - -extern "C" __global__ void hello_world(const float* a, float* b) { - int tx = threadIdx.x; - b[tx] = a[tx]; -} - -extern "C" __global__ void test_globals(const float* a, float* b) { - int tx = threadIdx.x; - b[tx] = a[tx] + myDeviceGlobal + myDeviceGlobalArray[tx % ARRAY_SIZE]; -} diff --git a/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp b/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp deleted file mode 100644 index d0ddb04cb6..0000000000 --- a/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* - * Test Scenarios - * 1. Verify hipExtLaunchKernelGGL API with concurrency flag - Verify hipExtLaunchKernelGGL API by disabling concurrency flag - 2. Verify kernel execution time of the particular kernel - 3. Verify hipExtLaunchKernelGGL API by disabling time flag in event creation - Testcase 1 is not included now as the firmware does not support concurrency - in the same stream. - */ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtLaunchKernelGGL_KernelExeTime --tests 2 EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtLaunchKernelGGL_TimeFlagDisabled --tests 3 EXCLUDE_HIP_PLATFORM nvidia - * HIT_END - */ - -#include "test_common.h" -#include "hip/hip_ext.h" -#define FIVESEC_KERNEL 4999 -#define THREESEC_KERNEL 2999 - -__device__ int globalvar = 1; -__global__ void TwoSecKernel(int clockrate) { - if (globalvar == 0x2222) { - globalvar = 0x3333; - } - uint64_t wait_t = 2000, - start = clock64()/clockrate, cur; - do { cur = (clock64()/clockrate)-start;}while (cur < wait_t); - if (globalvar != 0x3333) { - globalvar = 0x5555; - } -} - -__global__ void FourSecKernel(int clockrate) { - if (globalvar == 1) { - globalvar = 0x2222; - } - uint64_t wait_t = 4000, - start = clock64()/clockrate, cur; - do { cur = (clock64()/clockrate)-start;}while (cur < wait_t); - if (globalvar == 0x2222) { - globalvar = 0x4444; - } -} - -__global__ void TwoSecKernel_gfx11(int clockrate) { -#ifdef __HIP_PLATFORM_AMD__ - if (globalvar == 0x2222) { - globalvar = 0x3333; - } - uint64_t wait_t = 2000, - start = wall_clock64()/clockrate, cur; - do { cur = (wall_clock64()/clockrate)-start;}while (cur < wait_t); - if (globalvar != 0x3333) { - globalvar = 0x5555; - } -#endif -} - -__global__ void FourSecKernel_gfx11(int clockrate) { -#ifdef __HIP_PLATFORM_AMD__ - if (globalvar == 1) { - globalvar = 0x2222; - } - uint64_t wait_t = 4000, - start = wall_clock64()/clockrate, cur; - do { cur = (wall_clock64()/clockrate)-start;}while (cur < wait_t); - if (globalvar == 0x2222) { - globalvar = 0x4444; - } -#endif -} - -/* - * In this Scenario, we create events by disabling the timing flag - * We then Launch the kernel using hipExtModuleLaunchKernel by passing - * disabled events and try to fetch kernel execution time using - * hipEventElapsedTime API which would fail as the flag is disabled. - */ -bool DisableTimeFlag() { - bool testStatus = true; - hipStream_t stream1; - HIPCHECK(hipSetDevice(0)); - hipError_t e; - float time_4sec, time_2sec; - hipEvent_t start_event1, end_event1; - int clkRate = 0; - - if (IsGfx11()) { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0)); - } else { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); - } - - HIPCHECK(hipEventCreateWithFlags(&start_event1, - hipEventDisableTiming)); - HIPCHECK(hipEventCreateWithFlags(&end_event1, - hipEventDisableTiming)); - HIPCHECK(hipStreamCreate(&stream1)); - auto TwoSecKernel_used = IsGfx11() ? TwoSecKernel_gfx11 : TwoSecKernel; - hipExtLaunchKernelGGL((TwoSecKernel_used), dim3(1), dim3(1), 0, - stream1, start_event1, end_event1, 0, clkRate); - HIPCHECK(hipStreamSynchronize(stream1)); - e = hipEventElapsedTime(&time_2sec, start_event1, end_event1); - if (e == hipErrorInvalidHandle) { - testStatus = true; - } else { - printf("Event elapsed time is success when time flag is disabled \n"); - testStatus = false; - } - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipEventDestroy(start_event1)); - HIPCHECK(hipEventDestroy(end_event1)); - return testStatus; -} -/* - In this scenario , we initially create a global device variable - * with initial value as 1. We then launch the four sec and two sec kernels and - * try to modify the variable. - * In case of concurrency,the variable gets updated in four sec kernel to 0x2222 - * and then the two sec kernel would be launched parallely which would again - * modify the global variable to 0x3333 - * In case of non concurrency,the variale gets updated in four sec kernel - * and then in two sec kernel and the value of global variable would be 0x5555 - */ -bool ConcurencyCheck_GlobalVar(int conc_flag) { - bool testStatus = true; - hipStream_t stream1; - int deviceGlobal_h = 0; - HIPCHECK(hipSetDevice(0)); - int clkRate = 0; - - if (IsGfx11()) { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0)); - } else { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); - } - - HIPCHECK(hipStreamCreate(&stream1)); - auto TwoSecKernel_used = IsGfx11() ? TwoSecKernel_gfx11 : TwoSecKernel; - auto FourSecKernel_used = IsGfx11() ? FourSecKernel_gfx11 : FourSecKernel; - - hipExtLaunchKernelGGL((FourSecKernel_used), dim3(1), dim3(1), 0, - stream1, nullptr, nullptr, conc_flag, clkRate); - hipExtLaunchKernelGGL((TwoSecKernel_used), dim3(1), dim3(1), 0, - stream1, nullptr, nullptr, conc_flag, clkRate); - HIPCHECK(hipStreamSynchronize(stream1)); - HIPCHECK(hipMemcpyFromSymbol(&deviceGlobal_h, globalvar, - sizeof(int))); - - if (conc_flag && deviceGlobal_h != 0x5555) { - testStatus = true; - } else if (!conc_flag && deviceGlobal_h == 0x5555) { - testStatus = true; - } else { - printf("Concurrency check failed when conc_flag is %d ", conc_flag); - testStatus = false; - } - HIPCHECK(hipStreamDestroy(stream1)); - return testStatus; -} -/* - * Launching FourSecKernel and TwoSecKernel and then we try to - * get the event elapsed time of each kernel using the start and - * end events.The event elapsed time should return us the kernel - * execution time for that particular kernel -*/ -bool KernelTimeExecution() { - bool testStatus = true; - hipStream_t stream1; - hipError_t e; - HIPCHECK(hipSetDevice(0)); - hipEvent_t start_event1, end_event1, start_event2, end_event2; - float time_4sec, time_2sec; - int clkRate = 0; - - if (IsGfx11()) { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0)); - } else { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); - } - - auto TwoSecKernel_used = IsGfx11() ? TwoSecKernel_gfx11 : TwoSecKernel; - auto FourSecKernel_used = IsGfx11() ? FourSecKernel_gfx11 : FourSecKernel; - - HIPCHECK(hipEventCreate(&start_event1)); - HIPCHECK(hipEventCreate(&end_event1)); - HIPCHECK(hipEventCreate(&start_event2)); - HIPCHECK(hipEventCreate(&end_event2)); - HIPCHECK(hipStreamCreate(&stream1)); - hipExtLaunchKernelGGL((FourSecKernel_used), dim3(1), dim3(1), 0, - stream1, start_event1, end_event1, 0, clkRate); - hipExtLaunchKernelGGL((TwoSecKernel_used), dim3(1), dim3(1), 0, - stream1, start_event2, end_event2, 0, clkRate); - HIPCHECK(hipStreamSynchronize(stream1)); - e = hipEventElapsedTime(&time_4sec, start_event1, end_event1); - e = hipEventElapsedTime(&time_2sec, start_event2, end_event2); - - if ( (time_4sec < static_cast(FIVESEC_KERNEL)) && - (time_2sec < static_cast(THREESEC_KERNEL))) { - testStatus = true; - } else { - printf("Expected Vs Actual: Kernel1-<%d Vs %f Kernel2-<%d Vs %f\n", - FIVESEC_KERNEL, time_4sec, THREESEC_KERNEL, time_2sec); - testStatus = false; - } - - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipEventDestroy(start_event1)); - HIPCHECK(hipEventDestroy(end_event1)); - HIPCHECK(hipEventDestroy(start_event2)); - HIPCHECK(hipEventDestroy(end_event2)); - - return testStatus; -} - -int main(int argc, char* argv[]) { - bool testStatus = true; - HipTest::parseStandardArguments(argc, argv, false); - if (p_tests == 1) { - testStatus &= ConcurencyCheck_GlobalVar(0); - } else if (p_tests == 2) { - testStatus &= KernelTimeExecution(); - } else if (p_tests == 3) { - testStatus &= DisableTimeFlag(); - } else { - failed("Didnt receive any valid option.\n"); - } - if (testStatus) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/module/hipExtLaunchMultiKernelMultiDevice.cpp b/tests/src/runtimeApi/module/hipExtLaunchMultiKernelMultiDevice.cpp deleted file mode 100644 index c47c472766..0000000000 --- a/tests/src/runtimeApi/module/hipExtLaunchMultiKernelMultiDevice.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for hipExtLaunchMultiKernelMultiDevice API. It can be tested on -// single GPU or multi GPUs. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" -#include - -#define MAX_GPUS 8 -/* - * Square each element in the array A and write to array C. - */ -#define NUM_KERNEL_ARGS 3 -__global__ void -vector_square(float *C_d, float *A_d, size_t N) -{ - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; - - for (size_t i = offset; i < N; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } -} - -int main(int argc, char *argv[]) -{ - float *A_d[MAX_GPUS], *C_d[MAX_GPUS]; - float *A_h, *C_h; - size_t N = 1000000; - size_t Nbytes = N * sizeof(float); - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - printf ("info: didn't find any GPU!\n"); - return 0; - } - if (nGpu > MAX_GPUS) { - nGpu = MAX_GPUS; - } - - printf ("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - A_h = (float*)malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - C_h = (float*)malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - // Fill with Phi + i - for (size_t i = 0; i < N; i++) - { - A_h[i] = 1.618f + i; - } - - const unsigned blocks = 512; - const unsigned threadsPerBlock = 256; - - hipStream_t stream[MAX_GPUS]; - for (int i = 0; i < nGpu; i++) { - HIPCHECK(hipSetDevice(i)); - HIPCHECK(hipStreamCreateWithFlags(&stream[i], hipStreamNonBlocking)); - - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, i/*deviceID*/)); - printf ("info: running on bus 0x%2x %s\n", props.pciBusID, props.name); - - printf ("info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - HIPCHECK(hipMalloc(&A_d[i], Nbytes)); - HIPCHECK(hipMalloc(&C_d[i], Nbytes)); - - - printf ("info: copy Host2Device\n"); - HIPCHECK ( hipMemcpy(A_d[i], A_h, Nbytes, hipMemcpyHostToDevice)); - } - - hipLaunchParams *launchParamsList = reinterpret_cast( - malloc(sizeof(hipLaunchParams)*nGpu)); - - void *args[MAX_GPUS * NUM_KERNEL_ARGS]; - - for (int i = 0; i < nGpu; i++) { - args[i * NUM_KERNEL_ARGS] = &C_d[i]; - args[i * NUM_KERNEL_ARGS + 1] = &A_d[i]; - args[i * NUM_KERNEL_ARGS + 2] = &N; - launchParamsList[i].func = - reinterpret_cast(vector_square); - launchParamsList[i].gridDim = dim3(blocks); - launchParamsList[i].blockDim = dim3(threadsPerBlock); - launchParamsList[i].sharedMem = 0; - launchParamsList[i].stream = stream[i]; - launchParamsList[i].args = args + i * NUM_KERNEL_ARGS; - } - - printf ("info: launch vector_square kernel with hipExtLaunchMultiKernelMultiDevice API\n"); - hipExtLaunchMultiKernelMultiDevice(launchParamsList, nGpu, 0); - - for (int j = 0; j < nGpu; j++) { - hipStreamSynchronize(stream[j]); - - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, j/*deviceID*/)); - printf ("info: checking result on bus 0x%2x %s\n", props.pciBusID, props.name); - - printf ("info: copy Device2Host\n"); - HIPCHECK(hipSetDevice(j)); - HIPCHECK( hipMemcpy(C_h, C_d[j], Nbytes, hipMemcpyDeviceToHost)); - - printf ("info: check result\n"); - for (size_t i = 0; i < N; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - HIPCHECK(hipErrorUnknown); - } - } - } - - printf ("PASSED!\n"); -} diff --git a/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp b/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp deleted file mode 100644 index 65be909591..0000000000 --- a/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp +++ /dev/null @@ -1,627 +0,0 @@ -/* - Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* Test Scenarios - 1. hipExtModuleLaunchKernel Negative Scenarios - 2. hipExtModuleLaunchKernel concurrency verification using global variable - 3. hipExtModuleLaunchKernel concurrency verification by launching multiple kernels with and - without concurrency flag and verify the time difference between them - 4. hipExtModuleLaunchKernel API verifying the kernel execution time of a particular kernel. - 5. hipExtModuleLaunchKernel API verifying the kernel execution time by disabling the time flag - 6. hipExtModuleLaunchKernel API verifying Corner Scenarios for Grid and Block dimensions - 7. hipModuleLaunchKernel Work Group tests => - - (block.x * block.y * block.z) <= Work Group Size - where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - - (block.x * block.y * block.z) > Work Group Size - where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - - Scenarios 2 and 3 concurrency verification scenarios are not included in HIT command - as firmware currently does not support the concurrency in the same stream based on the flag. - - */ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtModuleLaunchKernel_NegativeTests --tests 1 EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtModuleLaunchKernel_KernelExecutionTime --tests 4 EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtModuleLaunchKernel_DisabledEventTimeFlag --tests 5 EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtModuleLaunchKernel_CornerScenarios --tests 6 EXCLUDE_HIP_PLATFORM nvidia - * TEST_NAMED: %t hipExtModuleLaunchKernel_WorkGroup --tests 7 EXCLUDE_HIP_PLATFORM nvidia - * HIT_END - */ -#include -#include "test_common.h" -#include "hip/hip_ext.h" - -#define fileName "matmul.code" -#define matmulK "matmulK" -#define KernelandExtra "KernelandExtraParams" -#define globalDevVar "deviceGlobal" -#define dummyKernel "dummyKernel" -#define FOURSEC_KERNEL 4999 -#define TWOSEC_KERNEL 2999 - -struct gridblockDim { - unsigned int gridX; - unsigned int gridY; - unsigned int gridZ; - unsigned int blockX; - unsigned int blockY; - unsigned int blockZ; -}; -class ModuleLaunchKernel { - int N = 64; - int SIZE = N*N; - int *A, *B, *C; - hipDeviceptr_t *Ad, *Bd; - hipStream_t stream1, stream2; - hipEvent_t start_event1, end_event1, start_event2, end_event2, - start_timingDisabled, end_timingDisabled; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel, - TwoSecKernel, KernelandExtraParamKernel, DummyKernel; - struct { - int clockRate; - void* _Ad; - void* _Bd; - void* _Cd; - int _n; - } args1, args2; - struct { - } args3; - size_t size1; - size_t size2; - size_t size3; - size_t deviceGlobalSize; - public : - void AllocateMemory(); - void DeAllocateMemory(); - void ModuleLoad(); - bool Module_Negative_tests(); - bool ExtModule_Negative_tests(); - bool ExtModule_Corner_tests(); - bool Module_WorkGroup_Test(); - bool ExtModule_KernelExecutionTime(); - bool ExtModule_ConcurencyCheck_GlobalVar(int conc_flag); - bool ExtModule_ConcurrencyCheck_TimeVer(); - bool ExtModule_Disabled_Timingflag(); -}; - -void ModuleLaunchKernel::AllocateMemory() { - A = new int[N*N*sizeof(int)]; - B = new int[N*N*sizeof(int)]; - for (int i=0; i < N; i++) { - for (int j=0; j < N; j++) { - A[i*N +j] = 1; - B[i*N +j] = 1; - } - } - HIPCHECK(hipStreamCreate(&stream1)); - HIPCHECK(hipStreamCreate(&stream2)); - HIPCHECK(hipMalloc(reinterpret_cast(&Ad), - SIZE*sizeof(int))); - HIPCHECK(hipMalloc(reinterpret_cast(&Bd), - SIZE*sizeof(int))); - HIPCHECK(hipHostMalloc(reinterpret_cast(&C), SIZE*sizeof(int))); - HIPCHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice)); - int clkRate = 0; - if (IsGfx11()) { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0)); - } else { - HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); - } - - args1._Ad = Ad; - args1._Bd = Bd; - args1._Cd = C; - args1._n = N; - args1.clockRate = clkRate; - args2._Ad = NULL; - args2._Bd = NULL; - args2._Cd = NULL; - args2._n = 0; - args2.clockRate = clkRate; - size1 = sizeof(args1); - size2 = sizeof(args2); - size3 = sizeof(args3); - HIPCHECK(hipEventCreate(&start_event1)); - HIPCHECK(hipEventCreate(&end_event1)); - HIPCHECK(hipEventCreate(&start_event2)); - HIPCHECK(hipEventCreate(&end_event2)); - HIPCHECK(hipEventCreateWithFlags(&start_timingDisabled, - hipEventDisableTiming)); - HIPCHECK(hipEventCreateWithFlags(&end_timingDisabled, - hipEventDisableTiming)); -} - -void ModuleLaunchKernel::ModuleLoad() { - - std::string TwoSecStr = IsGfx11() ? std::string("TwoSecKernel_gfx11") - : std::string("TwoSecKernel"); - std::string FourSecStr = IsGfx11() ? std::string("FourSecKernel_gfx11") - : std::string("FourSecKernel"); - std::string SixteenSecStr = IsGfx11() ? std::string("SixteenSecKernel_gfx11") - : std::string("SixteenSecKernel"); - - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&MultKernel, Module, matmulK)); - HIPCHECK(hipModuleGetFunction(&SixteenSecKernel, Module, SixteenSecStr.c_str())); - HIPCHECK(hipModuleGetFunction(&KernelandExtraParamKernel, - Module, KernelandExtra)); - HIPCHECK(hipModuleGetFunction(&FourSecKernel, Module, FourSecStr.c_str())); - HIPCHECK(hipModuleGetFunction(&TwoSecKernel, Module, TwoSecStr.c_str())); - HIPCHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); - HIPCHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, - Module, globalDevVar)); -} - -void ModuleLaunchKernel::DeAllocateMemory() { - HIPCHECK(hipEventDestroy(start_event1)); - HIPCHECK(hipEventDestroy(end_event1)); - HIPCHECK(hipEventDestroy(start_event2)); - HIPCHECK(hipEventDestroy(end_event2)); - HIPCHECK(hipEventDestroy(start_timingDisabled)); - HIPCHECK(hipEventDestroy(end_timingDisabled)); - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipStreamDestroy(stream2)); - delete[] A; - delete[] B; - HIPCHECK(hipFree(Ad)); - HIPCHECK(hipFree(Bd)); - HIPCHECK(hipHostFree(C)); - HIPCHECK(hipModuleUnload(Module)); -} -/* - * In this scenario,We launch the 4 sec kernel and 2 sec kernel - * and we fetch the event execution time of each kernel and it - * should not exceed the execution time of that particular kernel - */ -bool ModuleLaunchKernel::ExtModule_KernelExecutionTime() { - bool testStatus = true; - HIPCHECK(hipSetDevice(0)); - AllocateMemory(); - ModuleLoad(); - hipError_t e; - float time_4sec, time_2sec; - - void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, - NULL, reinterpret_cast(&config2), - start_event1, end_event1, 0)); - HIPCHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, - NULL, reinterpret_cast(&config2), - start_event2, end_event2, 0)); - HIPCHECK(hipStreamSynchronize(stream1)); - e = hipEventElapsedTime(&time_4sec, start_event1, end_event1); - e = hipEventElapsedTime(&time_2sec, start_event2, end_event2); - if (time_4sec < FOURSEC_KERNEL && time_2sec < TWOSEC_KERNEL) { - testStatus = true; - } else { - printf("Expected Vs Actual: Kernel1-<%d Vs %f Kernel2-<%d Vs %f\n", - FOURSEC_KERNEL, time_4sec, TWOSEC_KERNEL, time_2sec); - testStatus = false; - } - DeAllocateMemory(); - return testStatus; -} -/* - * In this Scenario, we create events by disabling the timing flag - * We then Launch the kernel using hipExtModuleLaunchKernel by passing - * disabled events and try to fetch kernel execution time using - * hipEventElapsedTime API which would fail as the flag is disabled. - */ -bool ModuleLaunchKernel::ExtModule_Disabled_Timingflag() { - bool testStatus = true; - AllocateMemory(); - ModuleLoad(); - hipError_t e; - float time_2sec; - void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, - NULL, reinterpret_cast(&config2), - start_timingDisabled, end_timingDisabled, 0)); - HIPCHECK(hipStreamSynchronize(stream1)); - e = hipEventElapsedTime(&time_2sec, start_timingDisabled, end_timingDisabled); - if (e == hipErrorInvalidHandle) { - testStatus = true; - } else { - printf("Event elapsed time is success when time flag is disabled \n"); - testStatus = false; - } - DeAllocateMemory(); - return testStatus; -} -/* - * In this scenario , we initially create a global device variable in matmul.cpp - * with initial value as 1 We then launch the four sec and two sec kernels and - * try to modify the variable. - * In case of concurrency,the variable gets updated in four sec kernel to 0x2222 - * and then the two sec kernel would be launched parallely which would again - * modify the global variable to 0x3333 - * In case of non concurrency,the variale gets updated in four sec kernel - * and then in two sec kernel and the value of global variable would be 0x5555 - */ -bool ModuleLaunchKernel::ExtModule_ConcurencyCheck_GlobalVar(int conc_flag) { - bool testStatus = true; - int deviceGlobal_h = 0; - AllocateMemory(); - ModuleLoad(); - void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, - NULL, reinterpret_cast(&config2), - start_event1, end_event1, conc_flag)); - HIPCHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1, - NULL, reinterpret_cast(&config2), - start_event2, end_event2, conc_flag)); - HIPCHECK(hipStreamSynchronize(stream1)); - HIPCHECK(hipMemcpyDtoH(&deviceGlobal_h, hipDeviceptr_t(deviceGlobal), - deviceGlobalSize)); - if (conc_flag && deviceGlobal_h != 0x5555) { - testStatus = true; - } else if (!conc_flag && deviceGlobal_h == 0x5555) { - testStatus = true; - } else { - printf("concurrency failed when concurrency flag is %d and global is %x", - conc_flag, deviceGlobal_h); - testStatus = false; - } - DeAllocateMemory(); - return testStatus; -} -/* In this scenario,we initially launch 2 kernels,one is sixteen sec kernel - * and other is matrix multiplication with non-concurrency (flag 0) - * and we launch the same 2 kernels with concurrency flag 1. We then compare - * the time difference between the concurrency and non currency kernels. - * The concurrency kernel duration should be less than the non concurrency - * duration kernels - */ -bool ModuleLaunchKernel::ExtModule_ConcurrencyCheck_TimeVer() { - bool testStatus = true; - AllocateMemory(); - ModuleLoad(); - int mismatch = 0; - void* config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, - HIP_LAUNCH_PARAM_END}; - void* config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, - HIP_LAUNCH_PARAM_END}; - auto start = std::chrono::high_resolution_clock::now(); - HIPCHECK(hipExtModuleLaunchKernel(SixteenSecKernel, 1, 1, 1, 1, 1, 1, 0, - stream1, NULL, - reinterpret_cast(&config2), - NULL, NULL, 0)); - HIPCHECK(hipExtModuleLaunchKernel(MultKernel, N, N, 1, 32, 32 , 1, 0, - stream1, NULL, - reinterpret_cast(&config1), - NULL, NULL, 0)); - HIPCHECK(hipStreamSynchronize(stream1)); - auto stop = std::chrono::high_resolution_clock::now(); - auto duration1 = std::chrono::duration_cast - (stop-start); - start = std::chrono::high_resolution_clock::now(); - HIPCHECK(hipExtModuleLaunchKernel(SixteenSecKernel, 1, 1, 1, 1, 1, 1, 0, - stream1, NULL, - reinterpret_cast(&config2), - NULL, NULL, 1)); - HIPCHECK(hipExtModuleLaunchKernel(MultKernel, N, N, 1, 32, 32, 1, 0, - stream1, NULL, - reinterpret_cast(&config1), - NULL, NULL, 1)); - HIPCHECK(hipStreamSynchronize(stream1)); - stop = std::chrono::high_resolution_clock::now(); - auto duration2 = std::chrono::duration_cast - (stop-start); - if (!(duration2.count() < duration1.count())) { - std::cout << "Test failed as there was no time gain observed when" - << " two kernels were launched using hipExtModuleLaunchKernel()" - << " with flag 1." <(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed nullptr to kernel function"); - testStatus = false; - } - // Passing Max int value to block dimensions - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max(), 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for max values to block dimension"); - testStatus = false; - } - // Passing 0 as value for all dimensions - err = hipExtModuleLaunchKernel(MultKernel, 0, 0, 0, - 0, - 0, - 0, 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for 0 as value for all dimensions"); - testStatus = false; - } - // Passing 0 as value for x dimension - err = hipExtModuleLaunchKernel(MultKernel, 0, 1, 1, - 0, - 1, - 1, 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for 0 as value for x dimension"); - testStatus = false; - } - // Passing 0 as value for y dimension - err = hipExtModuleLaunchKernel(MultKernel, 1, 0, 1, - 1, - 0, - 1, 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for 0 as value for y dimension"); - testStatus = false; - } - // Passing 0 as value for z dimension - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 0, - 1, - 1, - 0, 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for 0 as value for z dimension"); - testStatus = false; - } - // Passing both kernel and extra params - err = hipExtModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, 1, 1, 0, - stream1, reinterpret_cast(¶ms), - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel fail when we pass both kernel,extra args"); - testStatus = false; - } - // Passing more than maxthreadsperblock to block dimensions - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, - deviceProp.maxThreadsPerBlock+1, - deviceProp.maxThreadsPerBlock+1, - deviceProp.maxThreadsPerBlock+1, 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for max group size"); - testStatus = false; - } - // Block dimension X = Max Allowed + 1 - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, - deviceProp.maxThreadsDim[0]+1, - 1, - 1, 0, stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for (MaxBlockDimX + 1)"); - testStatus = false; - } - // Block dimension Y = Max Allowed + 1 - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, - 1, - deviceProp.maxThreadsDim[1]+1, - 1, 0, stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for (MaxBlockDimY + 1)"); - testStatus = false; - } - // Block dimension Z = Max Allowed + 1 - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, - 1, - 1, - deviceProp.maxThreadsDim[2]+1, 0, stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for (MaxBlockDimZ + 1)"); - testStatus = false; - } - - // Passing invalid config data in extra params - void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, - HIP_LAUNCH_PARAM_END}; - err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0, stream1, NULL, - reinterpret_cast(&config3), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for invalid conf \n"); - testStatus = false; - } - DeAllocateMemory(); - return testStatus; -} - -bool ModuleLaunchKernel::ExtModule_Corner_tests() { - bool testStatus = true; - HIPCHECK(hipSetDevice(0)); - hipError_t err; - AllocateMemory(); - ModuleLoad(); - void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3, - HIP_LAUNCH_PARAM_END}; - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - unsigned int maxblockX = deviceProp.maxThreadsDim[0]; - unsigned int maxblockY = deviceProp.maxThreadsDim[1]; - unsigned int maxblockZ = deviceProp.maxThreadsDim[2]; - struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1}, - {1, 1, 1, 1, maxblockY, 1}, - {1, 1, 1, 1, 1, maxblockZ}, - {UINT32_MAX, 1, 1, 1, 1, 1}, - {1, UINT32_MAX, 1, 1, 1, 1}, - {1, 1, UINT32_MAX, 1, 1, 1}}; - - for (int i = 0; i < 6; i++) { - err = hipExtModuleLaunchKernel(DummyKernel, - test[i].gridX, - test[i].gridY, - test[i].gridZ, - test[i].blockX, - test[i].blockY, - test[i].blockZ, - 0, - stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err != hipSuccess) { - printf("hipExtModuleLaunchKernel failed (%u, %u, %u) and (%u, %u, %u)", - test[i].gridX, test[i].gridY, test[i].gridZ, - test[i].blockX, test[i].blockY, test[i].blockZ); - testStatus = false; - } - } - DeAllocateMemory(); - return testStatus; -} - -bool ModuleLaunchKernel::Module_WorkGroup_Test() { - bool testStatus = true; - HIPCHECK(hipSetDevice(0)); - hipError_t err; - AllocateMemory(); - ModuleLoad(); - void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3, - HIP_LAUNCH_PARAM_END}; - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - double cuberootVal = - cbrt(static_cast(deviceProp.maxThreadsPerBlock)); - uint32_t cuberoot_floor = floor(cuberootVal); - uint32_t cuberoot_ceil = ceil(cuberootVal); - // Scenario: (block.x * block.y * block.z) <= Work Group Size where - // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - err = hipExtModuleLaunchKernel(DummyKernel, - 1, 1, 1, - cuberoot_floor, cuberoot_floor, cuberoot_floor, - 0, stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err != hipSuccess) { - printf("hipExtModuleLaunchKernel failed block dimensions (%u, %u, %u)", - cuberoot_floor, cuberoot_floor, cuberoot_floor); - testStatus = false; - } - // Scenario: (block.x * block.y * block.z) > Work Group Size where - // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - err = hipExtModuleLaunchKernel(DummyKernel, - 1, 1, 1, - cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1, - 0, stream1, NULL, - reinterpret_cast(&config1), - nullptr, nullptr, 0); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed block dimensions (%u, %u, %u)", - cuberoot_ceil, cuberoot_ceil, cuberoot_ceil); - testStatus = false; - } - DeAllocateMemory(); - return testStatus; -} - -int main(int argc, char* argv[]) { - bool testStatus = true; - HipTest::parseStandardArguments(argc, argv, false); - ModuleLaunchKernel kernelLaunch; - if (p_tests == 1) { - testStatus &= kernelLaunch.ExtModule_Negative_tests(); - } else if (p_tests == 2) { - testStatus &= kernelLaunch.ExtModule_ConcurencyCheck_GlobalVar(1); - testStatus &= kernelLaunch.ExtModule_ConcurencyCheck_GlobalVar(0); - } else if (p_tests == 3) { - testStatus &= kernelLaunch.ExtModule_ConcurrencyCheck_TimeVer(); - } else if (p_tests == 4) { - testStatus &= kernelLaunch.ExtModule_KernelExecutionTime(); - } else if (p_tests == 5) { - testStatus &= kernelLaunch.ExtModule_Disabled_Timingflag(); - } else if (p_tests == 6) { - testStatus &= kernelLaunch.ExtModule_Corner_tests(); - } else if (p_tests == 7) { - testStatus &= kernelLaunch.Module_WorkGroup_Test(); - } else { - failed("Didnt receive any valid option.\n"); - } - if (testStatus) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp deleted file mode 100644 index 1e563a27b2..0000000000 --- a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -__global__ -void fn(float* px, float* py) -{ - bool a[42]; - __shared__ double b[69]; - - for (auto&& x : b) x = *py++; - for (auto&& x : a) x = *px++ > 0.0; - for (auto&& x : a) if (x) *--py = *--px; -} - -int main() { - - hipFuncAttributes attr{}; - - auto r = hipFuncGetAttributes(&attr, reinterpret_cast(&fn)); - - if (r != hipSuccess || attr.maxThreadsPerBlock == 0) { - failed("Failed to read attributes."); - } - - passed(); -} diff --git a/tests/src/runtimeApi/module/hipFuncSetAttribute.cpp b/tests/src/runtimeApi/module/hipFuncSetAttribute.cpp deleted file mode 100644 index 7599a842c2..0000000000 --- a/tests/src/runtimeApi/module/hipFuncSetAttribute.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include "test_common.h" - -__global__ void fn(float* px, float* py) { - bool a[42]; - __shared__ double b[69]; - - for (auto&& x : b) x = *py++; - for (auto&& x : a) x = *px++ > 0.0; - for (auto&& x : a) if (x) *--py = *--px; -} - -int main() { - HIPCHECK(hipFuncSetAttribute(reinterpret_cast(&fn), - hipFuncAttributeMaxDynamicSharedMemorySize, - 0)); - HIPCHECK(hipFuncSetAttribute(reinterpret_cast(&fn), - hipFuncAttributePreferredSharedMemoryCarveout, - 0)); - passed(); -} diff --git a/tests/src/runtimeApi/module/hipFuncSetCacheConfig.cpp b/tests/src/runtimeApi/module/hipFuncSetCacheConfig.cpp deleted file mode 100644 index bf8621d021..0000000000 --- a/tests/src/runtimeApi/module/hipFuncSetCacheConfig.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -int main() { - hipFuncCache_t cacheConfig; - void* func; - hipFuncSetCacheConfig(func, cacheConfig); - passed(); -} diff --git a/tests/src/runtimeApi/module/hipFuncSetSharedMemConfig.cpp b/tests/src/runtimeApi/module/hipFuncSetSharedMemConfig.cpp deleted file mode 100644 index 7fa56fb8a1..0000000000 --- a/tests/src/runtimeApi/module/hipFuncSetSharedMemConfig.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - - - -// Test Description: -// This test case verifies the working of hipFuncSetSharedMemConfig() api and -// the flag parameter - -#include "test_common.h" - - -__global__ void ReverseSeq(int *A, int *B, int N) { - extern __shared__ int SMem[]; - int offset = threadIdx.x; - int MirrorVal = N - offset - 1; - SMem[offset] = A[offset]; - __syncthreads(); - B[offset] = SMem[MirrorVal]; -} - -int main() { - bool IfTestPassed = true; - int *Ah = NULL, *RAh = NULL, NELMTS = 128; - int *Ad = NULL, *RAd = NULL; - Ah = reinterpret_cast(malloc(NELMTS * sizeof(int))); - RAh = reinterpret_cast(malloc(NELMTS * sizeof(int))); - HIPCHECK(hipMalloc(&Ad, NELMTS * sizeof(int))); - HIPCHECK(hipMalloc(&RAd, NELMTS * sizeof(int))); - for (int i = 0; i < NELMTS; ++i) { - Ah[i] = i; - RAh[i] = NELMTS - i - 1; - } - HIPCHECK(hipMemcpy(Ad, Ah, NELMTS * sizeof(int), hipMemcpyHostToDevice)); - HIPCHECK(hipMemset(RAd, 0, NELMTS * sizeof(int))); - // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeDefault flag - HIPCHECK(hipFuncSetSharedMemConfig(reinterpret_cast(&ReverseSeq), - hipSharedMemBankSizeDefault)); - // Kernel Launch with shared mem size of = NELMTS * sizeof(int) - ReverseSeq<<<1, NELMTS, NELMTS * sizeof(int)>>>(Ad, RAd, NELMTS); - memset(Ah, 0, NELMTS * sizeof(int)); - // Verifying the results - HIPCHECK(hipMemcpy(Ah, RAd, NELMTS * sizeof(int), hipMemcpyDeviceToHost)); - for (int i = 0; i < NELMTS; ++i) { - if (Ah[i] != RAh[i]) { - printf("Mismatch found at %d value of array\n", i); - printf(" after setting the flag hipSharedMemBankSizeDefault\n"); - IfTestPassed = false; - } - } - // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeFourBytes flg - HIPCHECK(hipFuncSetSharedMemConfig(reinterpret_cast(&ReverseSeq), - hipSharedMemBankSizeFourByte)); - HIPCHECK(hipMemset(RAd, 0, NELMTS * sizeof(int))); - // Kernel Launch with shared mem size of = NELMTS * sizeof(int) - ReverseSeq<<<1, NELMTS, NELMTS * sizeof(int)>>>(Ad, RAd, NELMTS); - memset(Ah, 0, NELMTS * sizeof(int)); - // Verifying the results - HIPCHECK(hipMemcpy(Ah, RAd, NELMTS * sizeof(int), hipMemcpyDeviceToHost)); - for (int i = 0; i < NELMTS; ++i) { - if (Ah[i] != RAh[i]) { - printf("Mismatch found at %d value of array\n", i); - printf(" after setting the flag hipSharedMemBankSizeFourByte\n"); - IfTestPassed = false; - } - } - // Testing hipFuncSetSharedMemConfig() with hipSharedMemBankSizeEightBytes flg - HIPCHECK(hipFuncSetSharedMemConfig(reinterpret_cast(&ReverseSeq), - hipSharedMemBankSizeEightByte)); - HIPCHECK(hipMemset(RAd, 0, NELMTS * sizeof(int))); - // Kernel Launch with shared mem size of = NELMTS * sizeof(int) - ReverseSeq<<<1, NELMTS, NELMTS * sizeof(int)>>>(Ad, RAd, NELMTS); - memset(Ah, 0, NELMTS * sizeof(int)); - // Verifying the results - HIPCHECK(hipMemcpy(Ah, RAd, NELMTS * sizeof(int), hipMemcpyDeviceToHost)); - for (int i = 0; i < NELMTS; ++i) { - if (Ah[i] != RAh[i]) { - printf("Mismatch found at %d value of array\n", i); - printf(" after setting the flag hipSharedMemBankSizeEightByte\n"); - IfTestPassed = false; - } - } - - free(Ah); - free(RAh); - HIPCHECK(hipFree(Ad)); - HIPCHECK(hipFree(RAd)); - - if (IfTestPassed) { - passed(); - } else { - failed("\n"); - } -} diff --git a/tests/src/runtimeApi/module/hipManagedKeyword.cpp b/tests/src/runtimeApi/module/hipManagedKeyword.cpp deleted file mode 100644 index 6e378e14e7..0000000000 --- a/tests/src/runtimeApi/module/hipManagedKeyword.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD_CMD: managed_kernel.code %hc --genco %S/managed_kernel.cpp -o managed_kernel.code - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include -#include "test_common.h" - -#define MANAGED_VAR_INIT_VALUE 10 -#define fileName "managed_kernel.code" - -bool managedMultiGPUTest() { - int numDevices = 0; - hipDeviceptr_t x; - size_t xSize; - int data; - hipGetDeviceCount(&numDevices); - for (int i = 0; i < numDevices; i++) { - hipSetDevice(i); - hipDevice_t device; - hipCtx_t context; - hipDeviceGet(&device, i); - hipCtxCreate(&context, 0, device); - hipModule_t Module; - HIPCHECK(hipModuleLoad(&Module, fileName)); - hipFunction_t Function; - HIPCHECK(hipModuleGetFunction(&Function, Module, "GPU_func")); - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, 1, 1, 1, 0, 0, NULL, NULL)); - hipDeviceSynchronize(); - HIPCHECK(hipModuleGetGlobal((hipDeviceptr_t*)&x, &xSize, Module, "x")); - HIPCHECK(hipMemcpyDtoH(&data, hipDeviceptr_t(x), xSize)); - if (data != (1 + MANAGED_VAR_INIT_VALUE)) { - HIPCHECK(hipModuleUnload(Module)); - hipCtxDestroy(context); - return false; - } - HIPCHECK(hipModuleUnload(Module)); - hipCtxDestroy(context); - } - return true; -} - -int main(int argc, char** argv) { - hipInit(0); - bool testStatus = managedMultiGPUTest(); - if (!testStatus) { - failed("Managed keyword module test failed!"); - } - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModule.cpp b/tests/src/runtimeApi/module/hipModule.cpp deleted file mode 100644 index ed7ff62d79..0000000000 --- a/tests/src/runtimeApi/module/hipModule.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD_CMD: vcpy_kernel.code %hc --genco %S/vcpy_kernel.cpp -o vcpy_kernel.code - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * HIT_END - */ - -#include -#include -#ifdef __linux__ -#include -#endif -#include "test_common.h" - -#define LEN 64 -#define SIZE (LEN << 2) -#define COMMAND_LEN 256 -#define CODE_OBJ_SINGLEARCH "vcpy_kernel.code" -#define kernel_name "hello_world" -#define CODE_OBJ_MULTIARCH "vcpy_kernel_multarch.code" - -bool testCodeObjFile(const char *codeObjFile) { - float *A, *B; - hipDeviceptr_t Ad, Bd; - A = new float[LEN]; - B = new float[LEN]; - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = i * 1.0f; - B[i] = 0.0f; - } - - HIPCHECK(hipMalloc(reinterpret_cast(&Ad), SIZE)); - HIPCHECK(hipMalloc(reinterpret_cast(&Bd), SIZE)); - HIPCHECK(hipMemcpyHtoD(Ad, A, SIZE)); - HIPCHECK(hipMemcpyHtoD(Bd, B, SIZE)); - - hipModule_t Module; - hipFunction_t Function; - HIPCHECK(hipModuleLoad(&Module, codeObjFile)); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - struct { - void* _Ad; - void* _Bd; - } args; - args._Ad = reinterpret_cast(Ad); - args._Bd = reinterpret_cast(Bd); - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, - stream, NULL, - reinterpret_cast(&config))); - - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipMemcpyDtoH(B, Bd, SIZE)); - - bool btestPassed = true; - for (uint32_t i = 0; i < LEN; i++) { - if (A[i] != B[i]) { - btestPassed = false; - break; - } - } - HIPCHECK(hipFree(reinterpret_cast(Bd))); - HIPCHECK(hipFree(reinterpret_cast(Ad))); - delete[] B; - delete[] A; - HIPCHECK(hipModuleUnload(Module)); - return btestPassed; -} - -#ifdef __linux__ -/** - * Check if environment variable $ROCM_PATH is defined - * - */ -bool isRocmPathSet() { - FILE *fpipe; - char const *command = "echo $ROCM_PATH"; - fpipe = popen(command, "r"); - - if (fpipe == nullptr) { - printf("Unable to create command\n"); - return false; - } - char command_op[COMMAND_LEN]; - if (fgets(command_op, COMMAND_LEN, fpipe)) { - size_t len = strlen(command_op); - if (len > 1) { // This is because fgets always adds newline character - pclose(fpipe); - return true; - } - } - pclose(fpipe); - return false; -} -#endif - -bool testMultiTargArchCodeObj() { - bool btestPassed = true; -#if defined(__linux__) && defined(__HIP_PLATFORM_AMD__) - char command[COMMAND_LEN]; - hipDeviceProp_t props; - hipGetDeviceProperties(&props, 0); - // Extract the base GPU arch name excluding any feature - std::string arch = std::string(props.gcnArchName); - auto pos = arch.find(":"); - if (pos != std::string::npos) - arch = arch.substr(0, pos); - - // Hardcoding the codeobject lines in multiple string to avoid cpplint warning - std::string CodeObjL1 = "#include \"hip/hip_runtime.h\"\n"; - std::string CodeObjL2 = - "extern \"C\" __global__ void hello_world(float* a, float* b) {\n"; - std::string CodeObjL3 = " int tx = threadIdx.x;\n"; - std::string CodeObjL4 = " b[tx] = a[tx];\n"; - std::string CodeObjL5 = "}"; - // Creating the full code object string - static std::string CodeObj = CodeObjL1 + CodeObjL2 + CodeObjL3 + - CodeObjL4 + CodeObjL5; - std::ofstream ofs("/tmp/vcpy_kernel.cpp", std::ofstream::out); - ofs << CodeObj; - ofs.close(); - // Copy the file into current working location if not available - if (access("/tmp/vcpy_kernel.cpp", F_OK) == -1) { - printf("Code Object File: /tmp/vcpy_kernel.cpp not found \n"); - return true; - } - // Generate the command to generate multi architecture code object file - const char* hipcc_path = nullptr; - if (isRocmPathSet()) { - hipcc_path = "$ROCM_PATH/bin/hipcc"; - } else { - hipcc_path = "/opt/rocm/bin/hipcc"; - } - /* Putting these command parameters into a variable to shorten the string - literal length in order to avoid multiline string literal cpplint warning - */ - const char* genco_option = "--offload-arch"; - const char* input_codeobj = "/tmp/vcpy_kernel.cpp"; - snprintf(command, COMMAND_LEN, - "unset HIP_PATH;%s --genco %s=gfx801,gfx802,gfx803,gfx900,gfx908,%s %s -o %s", - hipcc_path, genco_option, arch.c_str(), input_codeobj, - CODE_OBJ_MULTIARCH); - - printf("command = %s\n", command); - system((const char*)command); - // Check if the code object file is created - snprintf(command, COMMAND_LEN, "./%s", - CODE_OBJ_MULTIARCH); - - if (access(command, F_OK) == -1) { - printf("Code Object File not found \n"); - return false; - } - btestPassed = testCodeObjFile(CODE_OBJ_MULTIARCH); -#else - printf("This test is skipped due to non linux or non AMD environment.\n"); -#endif - return btestPassed; -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - if (p_tests == 0x1) { - /* In this test scenario a code object file for the current - GPU architecture is generated, loaded and executed. */ - TestPassed = testCodeObjFile(CODE_OBJ_SINGLEARCH); - } else if (p_tests == 0x2) { - /* In this test scenario a code object file for the multiple - GPU architectures (including the current) is generated, loaded - and executed. */ - TestPassed = testMultiTargArchCodeObj(); - } else { - printf("Invalid Test Case \n"); - exit(1); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/module/hipModuleGetGlobal.cpp b/tests/src/runtimeApi/module/hipModuleGetGlobal.cpp deleted file mode 100644 index 6d64377689..0000000000 --- a/tests/src/runtimeApi/module/hipModuleGetGlobal.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/* -Copyright (c) 2017 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD_CMD: global_kernel.code %hc --genco %S/global_kernel.cpp -o global_kernel.code - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include -#include -#include - -#define LEN 64 -#define SIZE LEN * sizeof(float) - -#define fileName "global_kernel.code" -#define HIP_CHECK(cmd) \ - { \ - hipError_t status = cmd; \ - if (status != hipSuccess) { \ - std::cout << "error: #" << status << " (" << hipGetErrorString(status) \ - << ") at line:" << __LINE__ << ": " << #cmd << std::endl; \ - abort(); \ - } \ - } - -int main() { - float *A, *B; - float *Ad, *Bd; - A = new float[LEN]; - B = new float[LEN]; - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = i * 1.0f; - B[i] = 0.0f; - } - - hipInit(0); - hipDevice_t device; - hipCtx_t context; - hipDeviceGet(&device, 0); - hipCtxCreate(&context, 0, device); - - hipMalloc((void**)&Ad, SIZE); - hipMalloc((void**)&Bd, SIZE); - - hipMemcpyHtoD(hipDeviceptr_t(Ad), A, SIZE); - hipMemcpyHtoD((hipDeviceptr_t)(Bd), B, SIZE); - hipModule_t Module; - HIP_CHECK(hipModuleLoad(&Module, fileName)); - - float myDeviceGlobal_h = 42.0; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; - HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, Module, "myDeviceGlobal")); - HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), &myDeviceGlobal_h, deviceGlobalSize)); -#define ARRAY_SIZE 16 - float myDeviceGlobalArray_h[ARRAY_SIZE]; - hipDeviceptr_t myDeviceGlobalArray; - size_t myDeviceGlobalArraySize; - - HIP_CHECK(hipModuleGetGlobal((hipDeviceptr_t*)&myDeviceGlobalArray, &myDeviceGlobalArraySize, Module, "myDeviceGlobalArray")); - - for (int i = 0; i < ARRAY_SIZE; i++) { - myDeviceGlobalArray_h[i] = i * 1000.0f; - HIP_CHECK(hipMemcpyHtoD(hipDeviceptr_t(myDeviceGlobalArray), &myDeviceGlobalArray_h, myDeviceGlobalArraySize)); - } - - struct { - void* _Ad; - void* _Bd; - } args; - - args._Ad = (void*) Ad; - args._Bd = (void*) Bd; - - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - - { - hipFunction_t Function; - HIP_CHECK(hipModuleGetFunction(&Function, Module, "hello_world")); - HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); - - hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE); - - int mismatchCount = 0; - for (uint32_t i = 0; i < LEN; i++) { - if (A[i] != B[i]) { - mismatchCount++; - std::cout << "error: mismatch " << A[i] << " != " << B[i] << std::endl; - if (mismatchCount >= 10) { - break; - } - } - } - - if (mismatchCount == 0) { - std::cout << "PASSED!\n"; - } else { - std::cout << "FAILED!\n"; - }; - } - - { - hipFunction_t Function; - HIP_CHECK(hipModuleGetFunction(&Function, Module, "test_globals")); - HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config)); - - hipMemcpyDtoH(B, hipDeviceptr_t(Bd), SIZE); - - int mismatchCount = 0; - for (uint32_t i = 0; i < LEN; i++) { - float expected = A[i] + myDeviceGlobal_h + + myDeviceGlobalArray_h[i % 16]; - if (expected != B[i]) { - mismatchCount++; - std::cout << "error: mismatch " << expected << " != " << B[i] << std::endl; - if (mismatchCount >= 10) { - break; - } - } - } - - if (mismatchCount == 0) { - std::cout << "PASSED!\n"; - } else { - std::cout << "FAILED!\n"; - }; - } - - HIP_CHECK(hipModuleUnload(Module)); - hipCtxDestroy(context); - return 0; -} diff --git a/tests/src/runtimeApi/module/hipModuleLaunchKernel.cpp b/tests/src/runtimeApi/module/hipModuleLaunchKernel.cpp deleted file mode 100644 index 324ebd4fdf..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLaunchKernel.cpp +++ /dev/null @@ -1,356 +0,0 @@ -/* - Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ -/* Test Scenarios - 1. hipModuleLaunchKernel Negative Scenarios - 2. hipModuleLaunchKernel Corner Scenarios for Grid and Block dimensions - 3. hipModuleLaunchKernel Work Group tests => - - (block.x * block.y * block.z) <= Work Group Size - where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - - (block.x * block.y * block.z) > Work Group Size - where block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - */ -/* HIT_START - * BUILD_CMD: matmul.code %hc --genco %S/matmul.cpp -o matmul.code - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * TEST: %t --tests 0x3 - * HIT_END - */ - -#include -#include "test_common.h" - -#define fileName "matmul.code" -#define matmulK "matmulK" -#define SixteenSec "SixteenSecKernel" -#define KernelandExtra "KernelandExtraParams" -#define FourSec "FourSecKernel" -#define TwoSec "TwoSecKernel" -#define dummyKernel "dummyKernel" - -struct gridblockDim { - unsigned int gridX; - unsigned int gridY; - unsigned int gridZ; - unsigned int blockX; - unsigned int blockY; - unsigned int blockZ; -}; - -bool Module_Negative_tests() { - bool testStatus = true; - HIPCHECK(hipSetDevice(0)); - hipError_t err; - struct { - void* _Ad; - void* _Bd; - void* _Cd; - int _n; - } args1; - args1._Ad = nullptr; - args1._Bd = nullptr; - args1._Cd = nullptr; - args1._n = 0; - hipFunction_t MultKernel, KernelandExtraParamKernel; - size_t size1; - size1 = sizeof(args1); - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - hipStream_t stream1; - hipDeviceptr_t *Ad; - hipDevice_t device; -#ifdef __HIP_PLATFORM_NVCC__ - hipCtx_t context; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(&context, 0, device)); -#endif - - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&MultKernel, Module, matmulK)); - HIPCHECK(hipModuleGetFunction(&KernelandExtraParamKernel, - Module, KernelandExtra)); - void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, - HIP_LAUNCH_PARAM_END}; - void *params[] = {Ad}; - HIPCHECK(hipStreamCreate(&stream1)); - // Passing nullptr to kernel function - err = hipModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, - stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed nullptr to kernel function"); - testStatus = false; - } - // Passing Max int value to block dimensions - err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max(), - 0, stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for max values to block dimension"); - testStatus = false; - } - // Passing 0 as value for all dimensions - err = hipModuleLaunchKernel(MultKernel, 0, 0, 0, - 0, - 0, - 0, 0, - stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for 0 as value for all dimensions"); - testStatus = false; - } - // Passing 0 as value for x dimension - err = hipModuleLaunchKernel(MultKernel, 0, 1, 1, - 0, - 1, - 1, 0, - stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for 0 as value for x dimension"); - testStatus = false; - } - // Passing 0 as value for y dimension - err = hipModuleLaunchKernel(MultKernel, 1, 0, 1, - 1, - 0, - 1, 0, - stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for 0 as value for y dimension"); - testStatus = false; - } - // Passing 0 as value for z dimension - err = hipModuleLaunchKernel(MultKernel, 1, 1, 0, - 1, - 1, - 0, 0, - stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for 0 as value for z dimension"); - testStatus = false; - } - // Passing both kernel and extra params - err = hipModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, - 1, 1, 0, stream1, - reinterpret_cast(¶ms), - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel fail when we pass both kernel,extra args"); - testStatus = false; - } - // Passing more than maxthreadsperblock to block dimensions - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, - deviceProp.maxThreadsPerBlock+1, - deviceProp.maxThreadsPerBlock+1, - deviceProp.maxThreadsPerBlock+1, 0, stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for max group size"); - testStatus = false; - } - // Block dimension X = Max Allowed + 1 - err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, - deviceProp.maxThreadsDim[0]+1, - 1, - 1, 0, stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for (MaxBlockDimX + 1)"); - testStatus = false; - } - // Block dimension Y = Max Allowed + 1 - err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, - 1, - deviceProp.maxThreadsDim[1]+1, - 1, 0, stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for (MaxBlockDimY + 1)"); - testStatus = false; - } - // Block dimension Z = Max Allowed + 1 - err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, - 1, - 1, - deviceProp.maxThreadsDim[2]+1, 0, stream1, NULL, - reinterpret_cast(&config1)); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed for (MaxBlockDimZ + 1)"); - testStatus = false; - } - // Passing invalid config data to extra params - void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1, - HIP_LAUNCH_PARAM_END}; - err = hipModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0, stream1, NULL, - reinterpret_cast(&config3)); - if (err == hipSuccess) { - printf("hipExtModuleLaunchKernel failed for invalid conf \n"); - testStatus = false; - } - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVCC__ - hipCtxDestroy(context); -#endif - return testStatus; -} - -bool Module_GridBlock_Corner_Tests() { - bool testStatus = true; - HIPCHECK(hipSetDevice(0)); - hipError_t err; - hipFunction_t DummyKernel; - hipModule_t Module; - hipStream_t stream1; - hipDeviceptr_t *Ad; - hipDevice_t device; -#ifdef __HIP_PLATFORM_NVCC__ - hipCtx_t context; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(&context, 0, device)); -#endif - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); - HIPCHECK(hipStreamCreate(&stream1)); - // Passing Max int value to block dimensions - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - unsigned int maxblockX = deviceProp.maxThreadsDim[0]; - unsigned int maxblockY = deviceProp.maxThreadsDim[1]; - unsigned int maxblockZ = deviceProp.maxThreadsDim[2]; - unsigned int maxgridX = deviceProp.maxGridSize[0]; - unsigned int maxgridY = deviceProp.maxGridSize[1]; - unsigned int maxgridZ = deviceProp.maxGridSize[2]; - - struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1}, - {1, 1, 1, 1, maxblockY, 1}, - {1, 1, 1, 1, 1, maxblockZ}, - {maxgridX, 1, 1, 1, 1, 1}, - {1, maxgridY, 1, 1, 1, 1}, - {1, 1, maxgridZ, 1, 1, 1}}; - for (int i = 0; i < 6; i++) { - err = hipModuleLaunchKernel(DummyKernel, - test[i].gridX, - test[i].gridY, - test[i].gridZ, - test[i].blockX, - test[i].blockY, - test[i].blockZ, - 0, - stream1, NULL, NULL); - if (err != hipSuccess) { - printf("hipModuleLaunchKernel failed (%u, %u, %u) and (%u, %u, %u)", - test[i].gridX, test[i].gridY, test[i].gridZ, - test[i].blockX, test[i].blockY, test[i].blockZ); - testStatus = false; - } - } - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVCC__ - hipCtxDestroy(context); -#endif - return testStatus; -} - -bool Module_WorkGroup_Test() { - bool testStatus = true; - HIPCHECK(hipSetDevice(0)); - hipError_t err; - hipFunction_t DummyKernel; - hipModule_t Module; - hipStream_t stream1; - hipDeviceptr_t *Ad; - hipDevice_t device; -#ifdef __HIP_PLATFORM_NVCC__ - hipCtx_t context; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(&context, 0, device)); -#endif - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel)); - HIPCHECK(hipStreamCreate(&stream1)); - // Passing Max int value to block dimensions - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, 0); - double cuberootVal = - cbrt(static_cast(deviceProp.maxThreadsPerBlock)); - uint32_t cuberoot_floor = floor(cuberootVal); - uint32_t cuberoot_ceil = ceil(cuberootVal); - // Scenario: (block.x * block.y * block.z) <= Work Group Size where - // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - err = hipModuleLaunchKernel(DummyKernel, - 1, 1, 1, - cuberoot_floor, cuberoot_floor, cuberoot_floor, - 0, stream1, NULL, NULL); - if (err != hipSuccess) { - printf("hipModuleLaunchKernel failed block dimensions (%u, %u, %u)", - cuberoot_floor, cuberoot_floor, cuberoot_floor); - testStatus = false; - } - // Scenario: (block.x * block.y * block.z) > Work Group Size where - // block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ - err = hipModuleLaunchKernel(DummyKernel, - 1, 1, 1, - cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1, - 0, stream1, NULL, NULL); - if (err == hipSuccess) { - printf("hipModuleLaunchKernel failed block dimensions (%u, %u, %u)", - cuberoot_ceil, cuberoot_ceil, cuberoot_ceil); - testStatus = false; - } - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVCC__ - hipCtxDestroy(context); -#endif - return testStatus; -} - -int main(int argc, char* argv[]) { - bool testStatus = true; - HipTest::parseStandardArguments(argc, argv, true); - if (p_tests == 0x1) { - testStatus = Module_Negative_tests(); - } else if (p_tests == 0x2) { - testStatus = Module_GridBlock_Corner_Tests(); - } else if (p_tests == 0x3) { - testStatus = Module_WorkGroup_Test(); - } else { - printf("Invalid Test Case \n"); - exit(1); - } - if (testStatus) { - passed(); - } else { - failed("Test Failed!"); - } -} diff --git a/tests/src/runtimeApi/module/hipModuleLoadData.cpp b/tests/src/runtimeApi/module/hipModuleLoadData.cpp deleted file mode 100644 index 2b1cbf5c08..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLoadData.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include -#include -#include -#include -#include - -#include "test_common.h" - -#define LEN 64 -#define SIZE LEN << 2 - -#define FILENAME "vcpy_kernel.code" -#define kernel_name "hello_world" - -int main() { - float *A, *B, *Ad, *Bd; - A = new float[LEN]; - B = new float[LEN]; - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = i * 1.0f; - B[i] = 0.0f; - } - - HIPCHECK(hipInit(0)); - HIPCHECK(hipMalloc((void**)&Ad, SIZE)); - HIPCHECK(hipMalloc((void**)&Bd, SIZE)); - - HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); - - hipModule_t Module; - hipFunction_t Function; - std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(fsize); - if (file.read(buffer.data(), fsize)) { - HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - } - else { - failed("could not open code object '%s'\n", FILENAME); - } - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - struct { - void* _Ad; - void* _Bd; - } args; - args._Ad = (void*) Ad; - args._Bd = (void*) Bd; - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config)); - - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); - - for (uint32_t i = 0; i < LEN; i++) { - assert(A[i] == B[i]); - } - - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp deleted file mode 100644 index dadcc2d222..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - - -#include -#include -#include -#include -#include - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - - -#include "test_common.h" - -#define LEN 64 -#define SIZE LEN << 2 -#define THREADS 8 - -#define FILENAME "vcpy_kernel.code" -#define kernel_name "hello_world" - -std::vector load_file() { - std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(fsize); - if (!file.read(buffer.data(), fsize)) { - failed("could not open code object '%s'\n", FILENAME); - } - return buffer; -} - -void run(const std::vector& buffer, int deviceNo) { - hipSetDevice(deviceNo); - hipModule_t Module; - hipFunction_t Function; - - float *A, *B, *Ad, *Bd; - A = new float[LEN]; - B = new float[LEN]; - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = i * 1.0f; - B[i] = 0.0f; - } - - HIPCHECK(hipMalloc(&Ad, SIZE)); - HIPCHECK(hipMalloc(&Bd, SIZE)); - - HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); - - HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - struct { - void* _Ad; - void* _Bd; - } args; - args._Ad = static_cast(Ad); - args._Bd = static_cast(Bd); - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config)); - - HIPCHECK(hipStreamSynchronize(stream)); - - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipModuleUnload(Module)); - - HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); - - for (uint32_t i = 0; i < LEN; i++) { - assert(A[i] == B[i]); - } - hipFree(Ad); - hipFree(Bd); - delete[] A; - delete[] B; -} - -struct joinable_thread : std::thread { - template - joinable_thread(Xs&&... xs) : std::thread(std::forward(xs)...) {} // NOLINT - - joinable_thread& operator=(joinable_thread&& other) = default; - joinable_thread(joinable_thread&& other) = default; - - ~joinable_thread() { - if (this->joinable()) - this->join(); - } -}; - -void run_multi_threads(uint32_t n, const std::vector& buffer) { - int numDevices = 0; - HIPCHECK(hipGetDeviceCount(&numDevices)); - - std::vector threads; - - for (int deviceNo=0; deviceNo < numDevices; ++deviceNo) { - for (uint32_t i = 0; i < n; i++) { - threads.emplace_back(std::thread{[&, buffer] { - run(buffer, deviceNo); - }}); - } - } -} - -int main() { - HIPCHECK(hipInit(0)); - auto buffer = load_file(); - auto file_size = buffer.size() / (1024 * 1024); - auto thread_count = getHostThreadCount(file_size + 10); - if(thread_count == 0) { - failed("Thread Count is zero"); - } - run_multi_threads(thread_count, buffer); - - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp deleted file mode 100644 index 4b03c3b189..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM nvidia LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include -#include -#include -#include -#include - -#include "test_common.h" - -#define LEN 64 -#define SIZE LEN << 2 -#define THREADS 8 -#define MAX_THREADS 512 - -#define FILENAME "vcpy_kernel.code" -#define kernel_name "hello_world" - -std::vector load_file() { - std::ifstream file(FILENAME, std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(fsize); - if (!file.read(buffer.data(), fsize)) { - failed("could not open code object '%s'\n", FILENAME); - } - return buffer; -} - -void run(const std::vector& buffer) { - hipModule_t Module; - hipFunction_t Function; - - float *A, *B, *Ad, *Bd; - A = new float[LEN]; - B = new float[LEN]; - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = i * 1.0f; - B[i] = 0.0f; - } - - HIPCHECK(hipMalloc((void**)&Ad, SIZE)); - HIPCHECK(hipMalloc((void**)&Bd, SIZE)); - - HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); - - HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - struct { - void* _Ad; - void* _Bd; - } args; - args._Ad = (void*) Ad; - args._Bd = (void*) Bd; - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config)); - - HIPCHECK(hipStreamSynchronize(stream)); - - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipModuleUnload(Module)); - - HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); - - for (uint32_t i = 0; i < LEN; i++) { - assert(A[i] == B[i]); - } - - hipFree(Ad); - hipFree(Bd); - delete[] A; - delete[] B; -} - -struct joinable_thread : std::thread { - template - joinable_thread(Xs&&... xs) : std::thread(std::forward(xs)...) {} // NOLINT - - joinable_thread& operator=(joinable_thread&& other) = default; - joinable_thread(joinable_thread&& other) = default; - - ~joinable_thread() { - if (this->joinable()) - this->join(); - } -}; - -void run_multi_threads(uint32_t n, const std::vector& buffer) { - std::vector threads; - for (uint32_t i = 0; i < n; i++) { - threads.emplace_back(std::thread{[&] { - run(buffer); - }}); - } -} - -int main() { - HIPCHECK(hipInit(0)); - auto buffer = load_file(); - auto file_size = buffer.size() / (1024 * 1024); - auto thread_count = getHostThreadCount(file_size + 10); - if(thread_count == 0) { - failed("Thread Count is zero"); - } - - run_multi_threads(thread_count, buffer); - - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleLoadMultProcessOnMultGPU.cpp b/tests/src/runtimeApi/module/hipModuleLoadMultProcessOnMultGPU.cpp deleted file mode 100644 index dafa418b07..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLoadMultProcessOnMultGPU.cpp +++ /dev/null @@ -1,311 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD_CMD: kernel_composite_test.code %hc --genco %S/kernel_composite_test.cpp -o kernel_composite_test.code - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * TEST: %t --tests 0x3 - * HIT_END - */ -#include -#include - -#ifdef __linux__ -#include -#include -#endif -#include -#include -#include -#include -#include "test_common.h" - -#define TEST_ITERATIONS 1000 -#define CODEOBJ_FILE "kernel_composite_test.code" -#define CODEOBJ_GLOB_KERNEL1 "testWeightedCopy" -#define CODEOBJ_GLOB_KERNEL2 "getAvg" -#define BLOCKSPERCULDULD 6 -#define THREADSPERBLOCKLDULD 256 - -unsigned int globTestID = 0; - -/** - * Fetches Gpu device count - */ -void getDeviceCount(int *pdevCnt) { -#ifdef __linux__ - int fd[2], val = 0; - pid_t childpid; - - // create pipe descriptors - pipe(fd); - - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - - childpid = fork(); - - if (childpid > 0) { // Parent - close(fd[1]); - // parent will wait to read the device cnt - read(fd[0], &val, sizeof(val)); - - // close the read-descriptor - close(fd[0]); - - // wait for child exit - wait(NULL); - - *pdevCnt = val; - } else if (!childpid) { // Child - int devCnt = 1; - // writing only, no need for read-descriptor - close(fd[0]); - - HIPCHECK(hipGetDeviceCount(&devCnt)); - // send the value on the write-descriptor: - write(fd[1], &devCnt, sizeof(devCnt)); - - // close the write descriptor: - close(fd[1]); - exit(0); - } else { // failure - *pdevCnt = 1; - return; - } - -#else - HIPCHECK(hipGetDeviceCount(pdevCnt)); -#endif -} - -/** - * Validates hipModuleLoadUnload if globTestID = 1 - * Validates hipModuleLoadDataUnload if globTestID = 2 - * Validates hipModuleLoadDataExUnload if globTestID = 3 - */ -bool testhipModuleLoadUnloadFunc(const std::vector& buffer) { - size_t N = 16*16; - size_t Nbytes = N * sizeof(int); - int *A_d, *B_d; - int *A_h, *B_h; - unsigned blocks = HipTest::setNumBlocks(BLOCKSPERCULDULD, - THREADSPERBLOCKLDULD, N); - int deviceid; - hipGetDevice(&deviceid); - printf("pid = %u deviceid = %d\n", getpid(), deviceid); - // allocate host and device buffer - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&B_d, Nbytes)); - - A_h = reinterpret_cast(malloc(Nbytes)); - if (NULL == A_h) { - failed("Failed to allocate using malloc"); - } - B_h = reinterpret_cast(malloc(Nbytes)); - if (NULL == B_h) { - failed("Failed to allocate using malloc"); - } - // set host buffers - for (int idx = 0; idx < N; idx++) { - A_h[idx] = deviceid; - } - // Copy buffer from host to device - - HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - - hipModule_t Module; - hipFunction_t Function; - if (1 == globTestID) { - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - } else if (2 == globTestID) { - HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); - } else if (3 == globTestID) { - HIPCHECK(hipModuleLoadDataEx(&Module, - &buffer[0], 0, nullptr, nullptr)); - } - HIPCHECK(hipModuleGetFunction(&Function, Module, - CODEOBJ_GLOB_KERNEL1)); - float deviceGlobalFloatH = 3.14; - int deviceGlobalInt1H = 100*deviceid; - int deviceGlobalInt2H = 50*deviceid; - short deviceGlobalShortH = 25*deviceid; - char deviceGlobalCharH = 13*deviceid; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; - HIPCHECK(hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, - Module, "deviceGlobalFloat")); - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), - &deviceGlobalFloatH, - deviceGlobalSize)); - HIPCHECK(hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, - Module, "deviceGlobalInt1")); - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), - &deviceGlobalInt1H, - deviceGlobalSize)); - HIPCHECK(hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, - Module, - "deviceGlobalInt2")); - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), - &deviceGlobalInt2H, deviceGlobalSize)); - HIPCHECK(hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, - Module, "deviceGlobalShort")); - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), - &deviceGlobalShortH, deviceGlobalSize)); - HIPCHECK(hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, Module, "deviceGlobalChar")); - HIPCHECK(hipMemcpyHtoD(hipDeviceptr_t(deviceGlobal), - &deviceGlobalCharH, deviceGlobalSize)); - // Launch Function kernel function - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - struct { - void* _Ad; - void* _Bd; - } args; - args._Ad = reinterpret_cast(A_d); - args._Bd = reinterpret_cast(B_d); - size_t size = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, - N, 1, 1, 0, stream, NULL, - reinterpret_cast(&config))); - // Copy buffer from decice to host - HIPCHECK(hipMemcpyAsync(B_h, B_d, Nbytes, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipDeviceSynchronize()); - HIPCHECK(hipStreamDestroy(stream)); - - // Check the results - for (int idx = 0; idx < N; idx++) { - if (B_h[idx] != (deviceGlobalInt1H*A_h[idx] - + deviceGlobalInt2H - + static_cast(deviceGlobalShortH) + - + static_cast(deviceGlobalCharH) - + static_cast(deviceGlobalFloatH*deviceGlobalFloatH))) { - printf("Matrix Addition Failed\n"); - // exit the current process with failure - return false; - } - } - HIPCHECK(hipModuleUnload(Module)); - // free memory - HIPCHECK(hipFree(B_d)); - HIPCHECK(hipFree(A_d)); - free(B_h); - free(A_h); - printf("pid:%u PASSED\n", getpid()); - return true; -} - -/** - * Spawn 1 Process for each device - * - */ -void spawnProc(int deviceCount, const std::vector& buffer) { - int numDevices = deviceCount; - bool TestPassed = true; -#ifdef __linux__ - pid_t pid = 0; - // spawn a process for each device - for (int deviceNo = 0; deviceNo < numDevices; deviceNo++) { - if ((pid = fork()) < 0) { - printf("Child_Concurrency_MultiGpu : fork() returned error %d\n", - pid); - failed("Test Failed!"); - } else if (!pid) { // Child process - bool TestPassedChild = true; - // set the device id for the current process - HIPCHECK(hipSetDevice(deviceNo)); - TestPassedChild = testhipModuleLoadUnloadFunc(buffer); - - if (TestPassedChild) { - exit(0); // child exit with success status - } else { - printf("Child_Concurrency_MultiGpu : childpid %d failed\n", - getpid()); - exit(1); // child exit with failure status - } - } - } - int cumStatus = 0; - // Parent shall wait for child to complete - for (int i = 0; i < numDevices; i++) { - int pidwait = 0, exitStatus; - pidwait = wait(&exitStatus); - cumStatus |= WEXITSTATUS(exitStatus); - } - if (cumStatus) { - TestPassed &= false; - } -#else - for (int deviceNo = 0; deviceNo < numDevices; deviceNo++) { - // set the device id for the current process - HIPCHECK(hipSetDevice(deviceNo)); - TestPassed &= testhipModuleLoadUnloadFunc(buffer); - } -#endif - if (!TestPassed) { - failed("hipMallocChild_Concurrency_MultiGpu Failed!"); - } -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - int numDevices = 0; - getDeviceCount(&numDevices); - if (1 == numDevices) { - printf("Testing on Single GPU machine.\n"); - } - std::ifstream file(CODEOBJ_FILE, - std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(fsize); - if (!file.read(buffer.data(), fsize)) { - failed("could not open code object '%s'\n", CODEOBJ_FILE); - } - file.close(); - if (p_tests == 0x1) { - globTestID = 1; - spawnProc(numDevices, buffer); - } else if (p_tests == 0x2) { - globTestID = 2; - spawnProc(numDevices, buffer); - } else if (p_tests == 0x3) { - globTestID = 3; - spawnProc(numDevices, buffer); - } - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleLoadMultiThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadMultiThreaded.cpp deleted file mode 100644 index 24f0f782cf..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLoadMultiThreaded.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD_CMD: empty_kernel.code %hc --genco %S/empty_kernel.cpp -o empty_kernel.code - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" -#ifdef __HIP_PLATFORM_AMD__ -#include "hip/hip_ext.h" -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#define THREADS 8 -#define MAX_NUM_THREADS 512 - -#include "test_common.h" - -#define NUM_GROUPS 1 -#define GROUP_SIZE 1 -#define WARMUP_RUN_COUNT 10 -#define TIMING_RUN_COUNT 100 -#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT -#define FILENAME "empty_kernel.code" -#define kernel_name "EmptyKernel" - -void hipModuleLaunchKernel_enqueue(std::atomic_int* shared, int max_threads) -{ - //resources necessary for this thread - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - hipModule_t module; - hipFunction_t function; - - HIPCHECK(hipModuleLoad(&module, FILENAME)); - HIPCHECK(hipModuleGetFunction(&function, module, kernel_name)); - - void* kernel_params = nullptr; - std::array results; - - //synchronize all threads, before running - int tid = shared->fetch_add(1, std::memory_order_release); - while (max_threads != shared->load(std::memory_order_acquire)) {} - - for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { - HIPCHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr)); - } - HIPCHECK(hipModuleUnload(module)); - HIPCHECK(hipStreamDestroy(stream)); -} - -// thread pool -struct thread_pool { - thread_pool(int total_threads) : max_threads(total_threads) { - } - void start(std::function f) { - for (int i = 0; i < max_threads; ++i) { - threads.push_back(std::async(std::launch::async, f, &shared, max_threads)); - } - } - void finish() { - for (auto&&thread : threads) { - thread.get(); - } - threads.clear(); - shared = 0; - } - ~thread_pool() { - finish(); - } -private: - std::atomic_int shared {0}; - std::vector buffer; - std::vector> threads; - int max_threads = 1; -}; - - -int main(int argc, char* argv[]) -{ - int max_threads = min(THREADS * std::thread::hardware_concurrency(), MAX_NUM_THREADS); - thread_pool task(max_threads); - - task.start(hipModuleLaunchKernel_enqueue); - task.finish(); - - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleLoadUnloadStress.cpp b/tests/src/runtimeApi/module/hipModuleLoadUnloadStress.cpp deleted file mode 100644 index 6a88f0cc4b..0000000000 --- a/tests/src/runtimeApi/module/hipModuleLoadUnloadStress.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * TEST: %t --tests 0x3 - * HIT_END - */ -#include -#include -#include -#include -#include -#include -#include "test_common.h" - -#define TEST_ITERATIONS 1000 -#define CODEOBJ_FILE "kernel_composite_test.code" -/** - * Run Valgrind tool with these test cases to validate memory leakage. - * E.g. valgrind --leak-check=yes ./a.out --tests 0x1 - */ - -/** - * Internal Function - */ -std::vector load_file() { - std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - std::vector buffer(fsize); - if (!file.read(buffer.data(), fsize)) { - failed("could not open code object '%s'\n", CODEOBJ_FILE); - } - file.close(); - return buffer; -} -/** - * Validates no memory leakage for hipModuleLoad - */ -void testhipModuleLoadUnloadStress() { - for (int count = 0; count < TEST_ITERATIONS; count++) { - hipModule_t Module; - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - hipFunction_t Function; - HIPCHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy")); - HIPCHECK(hipModuleUnload(Module)); - } -} -/** - * Validates no memory leakage for hipModuleLoadData - */ -void testhipModuleLoadDataUnloadStress() { - auto buffer = load_file(); - for (int count = 0; count < TEST_ITERATIONS; count++) { - hipModule_t Module; - HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); - hipFunction_t Function; - HIPCHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy")); - HIPCHECK(hipModuleUnload(Module)); - } -} -/** - * Validates no memory leakage for hipModuleLoadDataEx - */ -void testhipModuleLoadDataExUnloadStress() { - auto buffer = load_file(); - for (int count = 0; count < TEST_ITERATIONS; count++) { - hipModule_t Module; - HIPCHECK(hipModuleLoadDataEx(&Module, &buffer[0], 0, - nullptr, nullptr)); - hipFunction_t Function; - HIPCHECK(hipModuleGetFunction(&Function, Module, "testWeightedCopy")); - HIPCHECK(hipModuleUnload(Module)); - } -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipInit(0)); - hipDevice_t device; - hipCtx_t context; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(&context, 0, device)); -#endif - if (p_tests == 0x1) { - testhipModuleLoadUnloadStress(); - } else if (p_tests == 0x2) { - testhipModuleLoadDataUnloadStress(); - } else if (p_tests == 0x3) { - testhipModuleLoadDataExUnloadStress(); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleNegative.cpp b/tests/src/runtimeApi/module/hipModuleNegative.cpp deleted file mode 100644 index 7984bebf96..0000000000 --- a/tests/src/runtimeApi/module/hipModuleNegative.cpp +++ /dev/null @@ -1,903 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t --tests 0x10 - * TEST: %t --tests 0x11 - * TEST: %t --tests 0x12 - * TEST: %t --tests 0x13 - * TEST: %t --tests 0x14 - * TEST: %t --tests 0x15 - * TEST: %t --tests 0x20 - * TEST: %t --tests 0x21 - * TEST: %t --tests 0x22 - * TEST: %t --tests 0x30 - * TEST: %t --tests 0x31 - * TEST: %t --tests 0x32 - * TEST: %t --tests 0x40 - * TEST: %t --tests 0x41 - * TEST: %t --tests 0x42 - * TEST: %t --tests 0x43 - * TEST: %t --tests 0x44 - * TEST: %t --tests 0x45 - * TEST: %t --tests 0x50 - * TEST: %t --tests 0x51 - * TEST: %t --tests 0x52 - * TEST: %t --tests 0x53 - * TEST: %t --tests 0x54 - * TEST: %t --tests 0x55 - * TEST: %t --tests 0x56 - * TEST: %t --tests 0x60 - * HIT_END - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "test_common.h" - -#define FILENAME_NONEXST "sample_nonexst.code" -#define FILENAME_EMPTY "emptyfile.code" -#define FILENAME_RAND "rand_file.code" -#define RANDOMFILE_LEN 2048 -#define CODEOBJ_FILE "vcpy_kernel.code" -#define KERNEL_NAME "hello_world" -#define KERNEL_NAME_NONEXST "xyz" -#define CODEOBJ_GLOBAL "global_kernel.code" -#define DEVGLOB_VAR_NONEXIST "xyz" -#define DEVGLOB_VAR "myDeviceGlobal" -/** - * Internal Function - */ -std::vector load_file(const char* filename) { - std::ifstream file(filename, std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - std::vector buffer(fsize); - if (!file.read(buffer.data(), fsize)) { - failed("could not open code object '%s'\n", filename); - } - file.close(); - return buffer; -} - -/** - * Internal Function - */ -void createRandomFile(const char* filename) { - std::ofstream outfile(filename, std::ios::binary); - char buf[RANDOMFILE_LEN]; - unsigned int seed = 1; - for (int i = 0; i < RANDOMFILE_LEN; i++) { - buf[i] = rand_r(&seed) % 256; - } - outfile.write(buf, RANDOMFILE_LEN); - outfile.close(); -} - -/** - * Internal Function - */ -#ifdef __HIP_PLATFORM_NVIDIA__ -void initHipCtx(hipCtx_t *pcontext) { - HIPCHECK(hipInit(0)); - hipDevice_t device; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(pcontext, 0, device)); -} -#endif - -/** - * Validates negative scenarios for hipModuleLoad - * module = nullptr - */ -bool testhipModuleLoadNeg10() { - bool TestPassed = false; - hipError_t ret; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoad(nullptr, CODEOBJ_FILE)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoad - * fname = nullptr - */ -bool testhipModuleLoadNeg11() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoad(&Module, nullptr)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} -/** - * Validates negative scenarios for hipModuleLoad - * fname = empty file - */ -bool testhipModuleLoadNeg12() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - // Create an empty - std::fstream fs; - fs.open(FILENAME_EMPTY, std::ios::out); - fs.close(); -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoad(&Module, FILENAME_EMPTY)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - remove(FILENAME_EMPTY); - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoad - * fname = ramdom file - */ -bool testhipModuleLoadNeg13() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - // Create a binary file with random numbers - createRandomFile(FILENAME_RAND); -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoad(&Module, FILENAME_RAND)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - remove(FILENAME_RAND); - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoad - * fname = non existent file - */ -bool testhipModuleLoadNeg14() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoad(&Module, FILENAME_NONEXST)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoad - * fname = empty string "" - */ -bool testhipModuleLoadNeg15() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoad(&Module, "")) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoadData - * module = nullptr - */ -bool testhipModuleLoadDataNeg20() { - bool TestPassed = false; - hipError_t ret; - auto buffer = load_file(CODEOBJ_FILE); -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoadData(nullptr, &buffer[0])) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoadData - * image = nullptr - */ -bool testhipModuleLoadDataNeg21() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoadData(&Module, nullptr)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoadData - * image = ramdom file - */ -bool testhipModuleLoadDataNeg22() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - // Create a binary file with random numbers - createRandomFile(FILENAME_RAND); - // Open the code object file and copy it in a buffer - auto buffer = load_file(FILENAME_RAND); -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoadData(&Module, &buffer[0])) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - remove(FILENAME_RAND); - return TestPassed; -} -/** - * Validates negative scenarios for hipModuleLoadDataEx - * module = nullptr - */ -bool testhipModuleLoadDataExNeg30() { - bool TestPassed = false; - hipError_t ret; - // Open the code object file and copy it in a buffer - auto buffer = load_file(CODEOBJ_FILE); -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoadDataEx(nullptr, &buffer[0], 0, nullptr, nullptr)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoadDataEx - * image = nullptr - */ -bool testhipModuleLoadDataExNeg31() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoadDataEx(&Module, nullptr, 0, nullptr, nullptr)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleLoadDataEx - * image = ramdom file - */ -bool testhipModuleLoadDataExNeg32() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - // Create a binary file with random numbers - createRandomFile(FILENAME_RAND); - // Open the code object file and copy it in a buffer - auto buffer = load_file(FILENAME_RAND); -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleLoadDataEx(&Module, &buffer[0], 0, nullptr, nullptr)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - remove(FILENAME_RAND); - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetFunction - * Function = nullptr - */ -bool testhipModuleGetFunctionNeg40() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if ((ret = hipModuleGetFunction(nullptr, Module, KERNEL_NAME)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetFunction - * Module is uninitialized - */ -bool testhipModuleGetFunctionNeg41() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipFunction_t Function; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleGetFunction(&Function, Module, KERNEL_NAME)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetFunction - * kname = non existing function - */ -bool testhipModuleGetFunctionNeg42() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipFunction_t Function; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if ((ret = hipModuleGetFunction(&Function, Module, KERNEL_NAME_NONEXST)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetFunction - * kname = nullptr - */ -bool testhipModuleGetFunctionNeg43() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipFunction_t Function; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if ((ret = hipModuleGetFunction(&Function, Module, nullptr)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetFunction - * Module = Unloaded Module - */ -bool testhipModuleGetFunctionNeg44() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipFunction_t Function; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - HIPCHECK(hipModuleUnload(Module)); - if ((ret = hipModuleGetFunction(&Function, Module, KERNEL_NAME)) - != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetFunction - * kname = Empty String "" - */ -bool testhipModuleGetFunctionNeg45() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipFunction_t Function; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if ((ret = hipModuleGetFunction(&Function, - Module, "")) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * dptr = nullptr - */ -bool testhipModuleGetGlobalNeg50() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - size_t deviceGlobalSize; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); - if ((ret = hipModuleGetGlobal(nullptr, - &deviceGlobalSize, Module, DEVGLOB_VAR)) != hipSuccess) { - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } else { - // If one of first two parameters is nullptr, it is ignored. - TestPassed = true; - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * bytes = nullptr - */ -bool testhipModuleGetGlobalNeg51() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); - if ((ret = hipModuleGetGlobal(&deviceGlobal, nullptr, - Module, DEVGLOB_VAR)) != hipSuccess) { - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } else { - // If one of first two parameters is nullptr, it is ignored. - TestPassed = true; - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * name = nullptr - */ -bool testhipModuleGetGlobalNeg52() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); - if ((ret = hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, Module, nullptr)) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * name = wrong name - */ -bool testhipModuleGetGlobalNeg53() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); - if ((ret = hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize, - Module, DEVGLOB_VAR_NONEXIST)) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * name = Empty String "" - */ -bool testhipModuleGetGlobalNeg54() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); - if ((ret = hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, Module, "")) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } - HIPCHECK(hipModuleUnload(Module)); -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * Module = Unloaded Module - */ -bool testhipModuleGetGlobalNeg55() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_GLOBAL)); - HIPCHECK(hipModuleUnload(Module)); - if ((ret = hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, Module, DEVGLOB_VAR)) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetGlobal - * Module = Uninitialized Module - */ -bool testhipModuleGetGlobalNeg56() { - bool TestPassed = false; - hipError_t ret; - hipModule_t Module; - hipDeviceptr_t deviceGlobal; - size_t deviceGlobalSize; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - if ((ret = hipModuleGetGlobal(&deviceGlobal, - &deviceGlobalSize, Module, DEVGLOB_VAR)) != hipSuccess) { - TestPassed = true; - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleUnload - * 1. Unload an uninitialized module - * 2. Unload an unloaded module - */ -bool testhipModuleLoadNeg60() { - bool TestPassed = true; - hipError_t ret; - hipModule_t Module; -#ifdef __HIP_PLATFORM_NVIDIA__ - hipCtx_t context; - initHipCtx(&context); -#endif - // test case 1 - if ((ret = hipModuleUnload(Module)) != hipSuccess) { - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } else { - TestPassed &= false; - } - // test case 2 - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - HIPCHECK(hipModuleUnload(Module)); - if ((ret = hipModuleUnload(Module)) != hipSuccess) { - printf("Test Passed: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(ret), ret); - } else { - TestPassed &= false; - } -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipCtxDestroy(context)); -#endif - return TestPassed; -} - -#ifdef __HIP_PLATFORM_NVIDIA__ -extern "C" void signalHandler(int sig, siginfo_t *info, void *xxx) -{ - printf("signalHandler(%d) called\n", sig); - throw sig; -} -#endif - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = false; - -#ifdef __HIP_PLATFORM_NVIDIA__ - if (p_tests == 0x41 || p_tests == 0x44 || p_tests == 0x55 - || p_tests == 0x56 || p_tests == 0x60) { - struct sigaction sa = {0}; - sa.sa_sigaction = signalHandler; - sigaction(SIGSEGV, &sa, NULL); - } - try { -#endif - - if (p_tests == 0x10) { - TestPassed = testhipModuleLoadNeg10(); - } else if (p_tests == 0x11) { - TestPassed = testhipModuleLoadNeg11(); - } else if (p_tests == 0x12) { - TestPassed = testhipModuleLoadNeg12(); - } else if (p_tests == 0x13) { - TestPassed = testhipModuleLoadNeg13(); - } else if (p_tests == 0x14) { - TestPassed = testhipModuleLoadNeg14(); - } else if (p_tests == 0x15) { - TestPassed = testhipModuleLoadNeg15(); - } else if (p_tests == 0x20) { - TestPassed = testhipModuleLoadDataNeg20(); - } else if (p_tests == 0x21) { - TestPassed = testhipModuleLoadDataNeg21(); - } else if (p_tests == 0x22) { - TestPassed = testhipModuleLoadDataNeg22(); - } else if (p_tests == 0x30) { - TestPassed = testhipModuleLoadDataExNeg30(); - } else if (p_tests == 0x31) { - TestPassed = testhipModuleLoadDataExNeg31(); - } else if (p_tests == 0x32) { - TestPassed = testhipModuleLoadDataExNeg32(); - } else if (p_tests == 0x40) { - TestPassed = testhipModuleGetFunctionNeg40(); - } else if (p_tests == 0x41) { - TestPassed = testhipModuleGetFunctionNeg41(); - } else if (p_tests == 0x42) { - TestPassed = testhipModuleGetFunctionNeg42(); - } else if (p_tests == 0x43) { - TestPassed = testhipModuleGetFunctionNeg43(); - } else if (p_tests == 0x44) { - TestPassed = testhipModuleGetFunctionNeg44(); - } else if (p_tests == 0x45) { - TestPassed = testhipModuleGetFunctionNeg45(); - } else if (p_tests == 0x50) { - TestPassed = testhipModuleGetGlobalNeg50(); - } else if (p_tests == 0x51) { - TestPassed = testhipModuleGetGlobalNeg51(); - } else if (p_tests == 0x52) { - TestPassed = testhipModuleGetGlobalNeg52(); - } else if (p_tests == 0x53) { - TestPassed = testhipModuleGetGlobalNeg53(); - } else if (p_tests == 0x54) { - TestPassed = testhipModuleGetGlobalNeg54(); - } else if (p_tests == 0x55) { - TestPassed = testhipModuleGetGlobalNeg55(); - } else if (p_tests == 0x56) { - TestPassed = testhipModuleGetGlobalNeg56(); - } else if (p_tests == 0x60) { - TestPassed = testhipModuleLoadNeg60(); - } else { - printf("Invalid Test Case \n"); - exit(1); - } -#ifdef __HIP_PLATFORM_NVIDIA__ - } - catch (const int sig) { - printf("catch exception %d\n", sig); - if (sig == SIGSEGV) { - TestPassed = true; - } - } - catch (...) { - printf("catch unknown exception\n"); - } -#endif - - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp b/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp deleted file mode 100644 index 28d40a2e71..0000000000 --- a/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define fileName "vcpy_kernel.code" -#define kernel_name "hello_world" - -int main(int argc, char* argv[]) { - - int gridSize = 0; - int blockSize = 0; - int numBlock = 0; - HIPCHECK(hipInit(0)); - - hipDevice_t device; - hipCtx_t context; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(&context, 0, device)); - - hipModule_t Module; - hipFunction_t Function; - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - HIPCHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, Function, 0, 0)); - assert(gridSize != 0 && blockSize != 0); - HIPCHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, Function, blockSize, 0)); - assert(numBlock != 0); - HIPCHECK(hipModuleUnload(Module)); - HIPCHECK(hipCtxDestroy(context)); - passed(); -} diff --git a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp b/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp deleted file mode 100644 index 94e753a585..0000000000 --- a/tests/src/runtimeApi/module/hipModuleTexture2dDrv.cpp +++ /dev/null @@ -1,670 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* Tests 6 and 7 are skipped for CUDA 11.2 due to cuda runtime issues */ -/* HIT_START - * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t --tests 0x01 - * TEST: %t --tests 0x02 - * TEST: %t --tests 0x03 - * TEST: %t --tests 0x04 - * TEST: %t --tests 0x05 - * TEST: %t --tests 0x06 - * TEST: %t --tests 0x07 - * TEST: %t --tests 0x10 - * TEST: %t --tests 0x11 - * TEST: %t --tests 0x12 - * TEST: %t --tests 0x13 - * TEST: %t --tests 0x14 - * TEST: %t --tests 0x15 - * HIT_END - */ -#include -#include -#include -#include -#include -#include -#include "test_common.h" - -#define CODEOBJ_FILE "tex2d_kernel.code" -#define NON_EXISTING_TEX_NAME "xyz" -#define EMPTY_TEX_NAME "" -#define GLOBAL_KERNEL_VAR "deviceGlobalFloat" -#define TEX_REF "ftex" -#define WIDTH 256 -#define HEIGHT 256 -#define MAX_STREAMS 4 -#define GRIDDIMX 16 -#define GRIDDIMY 16 -#define GRIDDIMZ 1 -#define BLOCKDIMZ 1 - -#ifdef __HIP_PLATFORM_NVIDIA__ - -#define CTX_CREATE() \ - hipCtx_t context;\ - initHipCtx(&context); - -#define CTX_DESTROY() HIPCHECK(hipCtxDestroy(context)); -#define ARRAY_DESTROY(array) HIPCHECK(hipArrayDestroy(array)); -#define HIP_TEX_REFERENCE hipTexRef -#define HIP_ARRAY hiparray -/** - * Internal Function - */ -void initHipCtx(hipCtx_t *pcontext) { - HIPCHECK(hipInit(0)); - hipDevice_t device; - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipCtxCreate(pcontext, 0, device)); -} - -#else // __HIP_PLATFORM_NVIDIA__ - -#define CTX_CREATE() -#define CTX_DESTROY() -#define ARRAY_DESTROY(array) HIPCHECK(hipFreeArray(array)); -#define HIP_TEX_REFERENCE textureReference* -#define HIP_ARRAY hipArray* -#endif // __HIP_PLATFORM_NVIDIA__ - -std::atomic g_thTestPassed(1); - -/** - * Validates negative scenarios for hipModuleGetTexRef - * texRef = nullptr - */ -bool testTexRefEqNullPtr() { - bool TestPassed = false; - hipModule_t Module; - CTX_CREATE() - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if (hipSuccess != hipModuleGetTexRef(nullptr, Module, "tex")) { - TestPassed = true; - } else { - printf("Test Failed as texRef = nullptr returns hipSuccess \n"); - } - CTX_DESTROY() - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetTexRef - * name = nullptr - */ -bool testNameEqNullPtr() { - bool TestPassed = false; - hipModule_t Module; - HIP_TEX_REFERENCE texref; - CTX_CREATE() - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if (hipSuccess != hipModuleGetTexRef(&texref, Module, nullptr)) { - TestPassed = true; - } else { - printf("Test Failed as name = nullptr returns hipSuccess \n"); - } - CTX_DESTROY() - return TestPassed; -} -/** - * Validates negative scenarios for hipModuleGetTexRef - * name = Non Existing Tex Name - */ -bool testInvalidTexName() { - bool TestPassed = false; - hipModule_t Module; - HIP_TEX_REFERENCE texref; - CTX_CREATE() - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if (hipSuccess != hipModuleGetTexRef(&texref, Module, - NON_EXISTING_TEX_NAME)) { - TestPassed = true; - } else { - printf("Test Failed as invalid tex ref returns hipSuccess \n"); - } - CTX_DESTROY() - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetTexRef - * name = Empty Tex Name - */ -bool testEmptyTexName() { - bool TestPassed = false; - hipModule_t Module; - HIP_TEX_REFERENCE texref; - CTX_CREATE() - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if (hipSuccess != hipModuleGetTexRef(&texref, Module, EMPTY_TEX_NAME)) { - TestPassed = true; - } else { - printf("Test Failed as empty tex ref returns hipSuccess \n"); - } - CTX_DESTROY() - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetTexRef - * name = Global Kernel Variable - */ -bool testWrongTexRef() { - bool TestPassed = false; - hipModule_t Module; - HIP_TEX_REFERENCE texref; - CTX_CREATE() - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - if (hipSuccess != hipModuleGetTexRef(&texref, Module, GLOBAL_KERNEL_VAR)) { - TestPassed = true; - } else { - printf("Test Failed as global tex ref returns hipSuccess \n"); - } - CTX_DESTROY() - return TestPassed; -} - -/** - * Validates negative scenarios for hipModuleGetTexRef - * module = unloaded module - */ -bool testUnloadedMod() { - bool TestPassed = false; - hipModule_t Module; - HIP_TEX_REFERENCE texref; - CTX_CREATE() - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - HIPCHECK(hipModuleUnload(Module)); - if (hipSuccess != hipModuleGetTexRef(&texref, Module, TEX_REF)) { - TestPassed = true; - } else { - printf("Test Failed as unloaded module returns hipSuccess \n"); - } - CTX_DESTROY() - return TestPassed; -} -/** - * Internal Functions - * - */ -std::vector load_file() { - std::ifstream file(CODEOBJ_FILE, std::ios::binary | std::ios::ate); - std::streamsize fsize = file.tellg(); - file.seekg(0, std::ios::beg); - - std::vector buffer(fsize); - if (!file.read(buffer.data(), fsize)) { - failed("could not open code object '%s'\n", CODEOBJ_FILE); - } - return buffer; -} - -template void fillTestBuffer(unsigned int width, - unsigned int height, - T* hData) { - if (std::is_same::value) { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j + 0.5; - } - } - } else if (std::is_same::value) { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - } else if (std::is_same::value) { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = (i * width + j)% - (std::numeric_limits::max()); - } - } - } else if (std::is_same::value) { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = (i * width + j)% - (std::numeric_limits::max()); - } - } - } -} - -void allocInitArray(unsigned int width, - unsigned int height, - hipArray_Format format, - HIP_ARRAY* array - ) { - HIP_ARRAY_DESCRIPTOR desc; - desc.Format = format; - desc.NumChannels = 1; - desc.Width = width; - desc.Height = height; - HIPCHECK(hipArrayCreate(array, &desc)); -} - -template void copyBuffer2Array(unsigned int width, - unsigned int height, - T* hData, - T1 array - ) { - hip_Memcpy2D copyParam; - memset(©Param, 0, sizeof(copyParam)); -#ifdef __HIP_PLATFORM_NVIDIA__ - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.dstArray = *array; -#else - copyParam.dstMemoryType = hipMemoryTypeArray; - copyParam.srcMemoryType = hipMemoryTypeHost; - copyParam.dstArray = array; -#endif - copyParam.srcHost = hData; - copyParam.srcPitch = width * sizeof(T); - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = height; - HIPCHECK(hipMemcpyParam2D(©Param)); -} - -template void assignArray2TexRef(hipArray_Format format, - const char* texRefName, - hipModule_t Module, - T array - ) { - HIP_TEX_REFERENCE texref; -#ifdef __HIP_PLATFORM_NVIDIA__ - HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName)); - HIPCHECK(hipTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)); - HIPCHECK(hipTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)); - HIPCHECK(hipTexRefSetFilterMode(texref, HIP_TR_FILTER_MODE_POINT)); - HIPCHECK(hipTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)); - HIPCHECK(hipTexRefSetFormat(texref, format, 1)); - HIPCHECK(hipTexRefSetArray(texref, *array, CU_TRSA_OVERRIDE_FORMAT)); -#else - HIPCHECK(hipModuleGetTexRef(&texref, Module, texRefName)); - HIPCHECK(hipTexRefSetAddressMode(texref, 0, hipAddressModeWrap)); - HIPCHECK(hipTexRefSetAddressMode(texref, 1, hipAddressModeWrap)); - HIPCHECK(hipTexRefSetFilterMode(texref, hipFilterModePoint)); - HIPCHECK(hipTexRefSetFlags(texref, HIP_TRSF_READ_AS_INTEGER)); - HIPCHECK(hipTexRefSetFormat(texref, format, 1)); - HIPCHECK(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT)); -#endif -} - -template bool validateOutput(unsigned int width, - unsigned int height, - T* hData, - T* hOutputData) { - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - std::cout << "Difference [ " << i << " " << j << "]:" << - (int)hData[i * width + j] << "---" << (int)hOutputData[i * width + j] - << std::endl; - return false; - } - } - } - return true; -} -/** - * Validates texture type data functionality for hipModuleGetTexRef - * - */ -template bool testTexType(hipArray_Format format, - const char* texRefName, - const char* kerFuncName) { - bool TestPassed = true; - unsigned int width = WIDTH; - unsigned int height = HEIGHT; - unsigned int size = width * height * sizeof(T); - T* hData = reinterpret_cast(malloc(size)); - if (NULL == hData) { - printf("Failed to allocate using malloc in testTexType.\n"); - return false; - } - CTX_CREATE() - fillTestBuffer(width, height, hData); - // Load Kernel File and create hipArray - hipModule_t Module; - HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); - HIP_ARRAY array; - allocInitArray(width, height, format, &array); -#ifdef __HIP_PLATFORM_NVIDIA__ - // Copy from hData to array using hipMemcpyParam2D - copyBuffer2Array(width, height, hData, &array); - // Get tex reference from the loaded kernel file - // Assign array to the tex reference - assignArray2TexRef(format, texRefName, Module, &array); -#else - // Copy from hData to array using hipMemcpyParam2D - copyBuffer2Array(width, height, hData, array); - // Get tex reference from the loaded kernel file - // Assign array to the tex reference - assignArray2TexRef(format, texRefName, Module, array); -#endif - hipFunction_t Function; - HIPCHECK(hipModuleGetFunction(&Function, Module, kerFuncName)); - - T* dData = NULL; - HIPCHECK(hipMalloc(reinterpret_cast(&dData), size)); - - struct { - void* _Ad; - unsigned int _Bd; - unsigned int _Cd; - } args; - args._Ad = reinterpret_cast(dData); - args._Bd = width; - args._Cd = height; - - size_t sizeTemp = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, - &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &sizeTemp, - HIP_LAUNCH_PARAM_END}; - - int temp1 = width / GRIDDIMX; - int temp2 = height / GRIDDIMY; - HIPCHECK( - hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ, - temp1, temp2, BLOCKDIMZ, 0, 0, - NULL, reinterpret_cast(&config))); - HIPCHECK(hipDeviceSynchronize()); - T* hOutputData = reinterpret_cast(malloc(size)); - if (NULL == hOutputData) { - printf("Failed to allocate using malloc in testTexType.\n"); - TestPassed = false; - } else { - memset(hOutputData, 0, size); - HIPCHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); - TestPassed = validateOutput(width, height, hData, hOutputData); - } - free(hOutputData); - HIPCHECK(hipFree(dData)); - ARRAY_DESTROY(array) - HIPCHECK(hipModuleUnload(Module)); - free(hData); - CTX_DESTROY() - return TestPassed; -} - -/** - * Validates texture functionality with multiple streams for hipModuleGetTexRef - * - */ -template bool testTexMultStream(const std::vector& buffer, - hipArray_Format format, - const char* texRefName, - const char* kerFuncName, - unsigned int numOfStreams) { - bool TestPassed = true; - unsigned int width = WIDTH; - unsigned int height = HEIGHT; - unsigned int size = width * height * sizeof(T); - T* hData = reinterpret_cast(malloc(size)); - if (NULL == hData) { - printf("Failed to allocate using malloc in testTexMultStream.\n"); - return false; - } - CTX_CREATE() - fillTestBuffer(width, height, hData); - - // Load Kernel File and create hipArray - hipModule_t Module; - HIPCHECK(hipModuleLoadData(&Module, &buffer[0])); - HIP_ARRAY array; - allocInitArray(width, height, format, &array); -#ifdef __HIP_PLATFORM_NVIDIA__ - // Copy from hData to array using hipMemcpyParam2D - copyBuffer2Array(width, height, hData, &array); - // Get tex reference from the loaded kernel file - // Assign array to the tex reference - assignArray2TexRef(format, texRefName, Module, &array); -#else - // Copy from hData to array using hipMemcpyParam2D - copyBuffer2Array(width, height, hData, array); - // Get tex reference from the loaded kernel file - // Assign array to the tex reference - assignArray2TexRef(format, texRefName, Module, array); -#endif - hipFunction_t Function; - HIPCHECK(hipModuleGetFunction(&Function, Module, kerFuncName)); - - // Create Multiple Strings - hipStream_t streams[MAX_STREAMS]={0}; - T* dData[MAX_STREAMS] = {NULL}; - T* hOutputData[MAX_STREAMS] = {NULL}; - if (numOfStreams > MAX_STREAMS) { - numOfStreams = MAX_STREAMS; - } - unsigned int totalStreamsCreated = 0; - for (int stream_num = 0; stream_num < numOfStreams; stream_num++) { - hOutputData[stream_num] = reinterpret_cast(malloc(size)); - if (NULL == hOutputData[stream_num]) { - printf("Failed to allocate using malloc in testTexMultStream.\n"); - TestPassed &= false; - break; - } - HIPCHECK(hipStreamCreate(&streams[stream_num])); - HIPCHECK(hipMalloc(reinterpret_cast(&dData[stream_num]), size)); - memset(hOutputData[stream_num], 0, size); - struct { - void* _Ad; - unsigned int _Bd; - unsigned int _Cd; - } args; - args._Ad = reinterpret_cast(dData[stream_num]); - args._Bd = width; - args._Cd = height; - - size_t sizeTemp = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, - &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, - &sizeTemp, - HIP_LAUNCH_PARAM_END}; - - int temp1 = width / GRIDDIMX; - int temp2 = height / GRIDDIMY; - HIPCHECK( - hipModuleLaunchKernel(Function, GRIDDIMX, GRIDDIMY, GRIDDIMZ, - temp1, temp2, BLOCKDIMZ, 0, streams[stream_num], - NULL, reinterpret_cast(&config))); - totalStreamsCreated++; - } - // Check the kernel results separately - for (int stream_num = 0; stream_num < totalStreamsCreated; stream_num++) { - HIPCHECK(hipStreamSynchronize(streams[stream_num])); - HIPCHECK(hipMemcpy(hOutputData[stream_num], dData[stream_num], size, - hipMemcpyDeviceToHost)); - TestPassed &= validateOutput(width, height, hData, - hOutputData[stream_num]); - } - for (int i = 0; i < totalStreamsCreated; i++) { - HIPCHECK(hipFree(dData[i])); - HIPCHECK(hipStreamDestroy(streams[i])); - free(hOutputData[i]); - } - ARRAY_DESTROY(array) - HIPCHECK(hipModuleUnload(Module)); - free(hData); - CTX_DESTROY() - return TestPassed; -} - -/** - * Internal Thread Functions - * - */ -void launchSingleStreamMultGPU(int gpu, const std::vector& buffer) { - bool TestPassed = true; - HIPCHECK(hipSetDevice(gpu)); - TestPassed = testTexMultStream(buffer, - HIP_AD_FORMAT_FLOAT, - "ftex", - "tex2dKernelFloat", 1); - g_thTestPassed &= static_cast(TestPassed); -} - -void launchMultStreamMultGPU(int gpu, const std::vector& buffer) { - bool TestPassed = true; - HIPCHECK(hipSetDevice(gpu)); - TestPassed = testTexMultStream(buffer, - HIP_AD_FORMAT_FLOAT, - "ftex", - "tex2dKernelFloat", 3); - g_thTestPassed &= static_cast(TestPassed); -} -/** - * Validates texture functionality with Multiple Streams on multuple GPU - * for hipModuleGetTexRef - * - */ -bool testTexMultStreamMultGPU(unsigned int numOfGPUs, - const std::vector& buffer) { - bool TestPassed = true; - std::thread T[numOfGPUs]; - - for (int gpu = 0; gpu < numOfGPUs; gpu++) { - T[gpu] = std::thread(launchMultStreamMultGPU, gpu, buffer); - } - for (int gpu = 0; gpu < numOfGPUs; gpu++) { - T[gpu].join(); - } - - if (g_thTestPassed) { - TestPassed = true; - } else { - TestPassed = false; - } - return TestPassed; -} -/** - * Validates texture functionality with Single Stream on multuple GPU - * for hipModuleGetTexRef - * - */ -bool testTexSingleStreamMultGPU(unsigned int numOfGPUs, - const std::vector& buffer) { - bool TestPassed = true; - std::thread T[numOfGPUs]; - - for (int gpu = 0; gpu < numOfGPUs; gpu++) { - T[gpu] = std::thread(launchSingleStreamMultGPU, gpu, buffer); - } - for (int gpu = 0; gpu < numOfGPUs; gpu++) { - T[gpu].join(); - } - - if (g_thTestPassed) { - TestPassed = true; - } else { - TestPassed = false; - } - return TestPassed; -} - -int main(int argc, char** argv) { - HipTest::parseStandardArguments(argc, argv, true); - checkImageSupport(); - - bool TestPassed = true; - if (p_tests == 0x01) { - TestPassed = testTexType(HIP_AD_FORMAT_FLOAT, - "ftex", - "tex2dKernelFloat"); - } else if (p_tests == 0x02) { - TestPassed = testTexType(HIP_AD_FORMAT_SIGNED_INT32, - "itex", - "tex2dKernelInt"); - } else if (p_tests == 0x03) { - TestPassed = testTexType(HIP_AD_FORMAT_SIGNED_INT16, - "stex", - "tex2dKernelInt16"); - } else if (p_tests == 0x04) { - TestPassed = testTexType(HIP_AD_FORMAT_SIGNED_INT8, - "ctex", - "tex2dKernelInt8"); - } else if (p_tests == 0x05) { - auto buffer = load_file(); - TestPassed = testTexMultStream(buffer, - HIP_AD_FORMAT_FLOAT, - "ftex", - "tex2dKernelFloat", - MAX_STREAMS); - } else if (p_tests == 0x06) { - // Testcase skipped on nvidia with CUDA API version 11.2, - // as hipModuleLoadData returning error code - // 'a PTX JIT compilation failed'(218), which is invalid - // behavior. Test passes with AMD and previous CUDA versions. -#if defined(__HIP_PLATFORM_NVIDIA__) && (CUDA_VERSION == 11020) - printf("Testcase skipped on CUDA version 11.2\n"); - TestPassed = true; -#else - int gpu_cnt = 0; - auto buffer = load_file(); - HIPCHECK(hipGetDeviceCount(&gpu_cnt)); - TestPassed = testTexSingleStreamMultGPU(gpu_cnt, buffer); -#endif - } else if (p_tests == 0x07) { - // Testcase skipped on nvidia with CUDA API version 11.2, - // as hipModuleLoadData returning error code - // 'a PTX JIT compilation failed'(218), which is invalid - // behavior. Test passes with AMD and previous CUDA versions. -#if defined(__HIP_PLATFORM_NVIDIA__) && (CUDA_VERSION == 11020) - printf("Testcase skipped on CUDA version 11.2\n"); - TestPassed = true; -#else - int gpu_cnt = 0; - auto buffer = load_file(); - HIPCHECK(hipGetDeviceCount(&gpu_cnt)); - TestPassed = testTexMultStreamMultGPU(gpu_cnt, buffer); -#endif - } else if (p_tests == 0x10) { - TestPassed = testTexRefEqNullPtr(); - } else if (p_tests == 0x11) { - TestPassed = testNameEqNullPtr(); - } else if (p_tests == 0x12) { - TestPassed = testInvalidTexName(); - } else if (p_tests == 0x13) { - TestPassed = testEmptyTexName(); - } else if (p_tests == 0x14) { - TestPassed = testWrongTexRef(); - } else if (p_tests == 0x15) { - TestPassed = testUnloadedMod(); - } else { - printf("Invalid Test Case \n"); - exit(1); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/module/hipModuleUnload.cpp b/tests/src/runtimeApi/module/hipModuleUnload.cpp deleted file mode 100644 index d7843548ca..0000000000 --- a/tests/src/runtimeApi/module/hipModuleUnload.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include - -#define fileName "vcpy_isa.co" - -int main() { - hipModule_t module; - hipModuleLoad(&module, fileName); - hipModuleUnload(module); -} diff --git a/tests/src/runtimeApi/module/hipOpenCLCOTest.cpp b/tests/src/runtimeApi/module/hipOpenCLCOTest.cpp deleted file mode 100644 index fcf92dad87..0000000000 --- a/tests/src/runtimeApi/module/hipOpenCLCOTest.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t --tests 0x1 - * TEST: %t --tests 0x2 - * HIT_END - */ -#ifdef __linux__ -#include -#endif -#include -#include -#include -#include "test_common.h" - -#define OPENCL_OBJ_FILE "opencl_add.cpp" -#define HIP_CODEOBJ_FILE_DEFAULT "opencl_add.co" -#define HIP_CODEOBJ_FILE_V3 "opencl_add_v3.co" -#define COMMAND_LEN 256 -#define BUFFER_LEN 256 -/** - * Validates OpenCL Static Lds Code Object - * - */ -bool testStaticLdsCodeObj(const char* pCodeObjFile) { - hipDevice_t device; - hipModule_t Module; - hipFunction_t Function; - printf("Executing %s \n", __func__); - HIPCHECK(hipDeviceGet(&device, 0)); - HIPCHECK(hipModuleLoad(&Module, pCodeObjFile)); - HIPCHECK(hipModuleGetFunction(&Function, Module, "add")); - - float *Ah, *Bh; - Ah = new float[BUFFER_LEN]; - Bh = new float[BUFFER_LEN]; - for (uint32_t i = 0; i < BUFFER_LEN; i++) { - Ah[i] = i * 1.0f; - Bh[i] = 0.0f; - } - - float *Ad, *Bd; - HIPCHECK(hipMalloc(&Ad, sizeof(float) * BUFFER_LEN)); - HIPCHECK(hipMalloc(&Bd, sizeof(float) * BUFFER_LEN)); - HIPCHECK(hipMemcpy(Ad, Ah, sizeof(float) * BUFFER_LEN, - hipMemcpyHostToDevice)); - - struct { - void* _Bd; - void* _Ad; - } args; - args._Ad = static_cast(Ad); - args._Bd = static_cast(Bd); - size_t size = sizeof(args); - - void *config[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END - }; - - HIPCHECK(hipModuleLaunchKernel(Function, 1, 1, 1, BUFFER_LEN, 1, 1, 0, 0, - NULL, reinterpret_cast(&config))); - HIPCHECK(hipMemcpy(Bh, Bd, sizeof(float) * BUFFER_LEN, - hipMemcpyDeviceToHost)); - - bool TestPassed = true; - for (uint32_t i = 0; i < BUFFER_LEN; i++) { - if (Ah[i] != Bh[i]) { - TestPassed = false; - break; - } - } - hipFree(Ad); - hipFree(Bd); - delete[] Ah; - delete[] Bh; - return TestPassed; -} - -#ifdef __linux__ -/** - * Check if environment variable $ROCM_PATH is defined - * - */ -bool isRocmPathSet() { - FILE *fpipe; - char const *command = "echo $ROCM_PATH"; - fpipe = popen(command, "r"); - - if (fpipe == nullptr) { - printf("Unable to create command\n"); - return false; - } - char command_op[BUFFER_LEN]; - if (fgets(command_op, BUFFER_LEN, fpipe)) { - size_t len = strlen(command_op); - if (len > 1) { // This is because fgets always adds newline character - pclose(fpipe); - return true; - } - } - pclose(fpipe); - return false; -} -/** - * Gets the sramecc/xnack settings from rocm info - * - */ -int getV3TargetIdFeature(char* feature, bool rocmPathSet) { - FILE *fpipe; - char command[COMMAND_LEN] = ""; - const char *rocmpath = nullptr; - if (rocmPathSet) { - // For STG2 testing where /opt/rocm path is not present - rocmpath = "$ROCM_PATH/bin/rocminfo"; - } else { - // Check if the rocminfo tool exists - rocmpath = "/opt/rocm/bin/rocminfo"; - } - snprintf(command, COMMAND_LEN, "%s", rocmpath); - strncat(command, " | grep -m1 \"sramecc.:xnack.\"", COMMAND_LEN); - fpipe = popen(command, "r"); - - if (fpipe == nullptr) { - printf("Unable to create command file\n"); - return -1; - } - char command_op[BUFFER_LEN]; - const char* pOpt1 = nullptr; - const char *pOpt2 = nullptr; - if (fgets(command_op, BUFFER_LEN, fpipe)) { - if (strstr(command_op, "sramecc+")) { - pOpt1 = "-msram-ecc"; - } else if (strstr(command_op, "sramecc-")) { - pOpt1 = "-mno-sram-ecc"; - } else { - pclose(fpipe); - return -1; - } - if (strstr(command_op, "xnack+")) { - pOpt2 = " -mxnack"; - } else if (strstr(command_op, "xnack-")) { - pOpt2 = " -mno-xnack"; - } else { - pclose(fpipe); - return -1; - } - } else { - printf("No sramecc/xnack settings found.\n"); - pclose(fpipe); - return -1; - } - strncpy(feature, pOpt1, strlen(pOpt1)); - strncat(feature, pOpt2, strlen(pOpt2)); - pclose(fpipe); - return 0; -} -#endif - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - int version = HIP_VERSION_MAJOR; -#ifdef __linux__ - char command[COMMAND_LEN] = ""; - char v3option[32] = ""; - hipDeviceProp_t props; - hipGetDeviceProperties(&props, 0); - if (access("./opencl_add.cpp", F_OK) == -1) { - system("cp ../tests/src/runtimeApi/module/opencl_add.cpp ."); - } - // Generate the command to translate the OpenCL code object to hip code object - const char *pCodeObjVer = nullptr; - const char *pCodeObjFile = nullptr; - bool rocmPathSet = isRocmPathSet(); - if (p_tests == 0x1) { - pCodeObjVer = ""; - pCodeObjFile = HIP_CODEOBJ_FILE_DEFAULT; - } else if ((p_tests == 0x2) && (version >= 4)) { - pCodeObjVer = "-mcode-object-version=3"; - if (-1 == getV3TargetIdFeature(v3option, rocmPathSet)) { - printf("Error getting V3 Option. Skipping Test. \n"); - passed(); - } - pCodeObjFile = HIP_CODEOBJ_FILE_V3; - } else { - printf("Invalid Test Case \n"); - passed(); - } - printf("v3option = %s\n", v3option); - /* The command string is created using multiple concatenation instead of one go - to avoid the following cpplint error: - " Multi-line string ("...") found. This lint script doesn't do well with such strings, - and may give bogus warnings. Use C++11 raw strings or concatenation instead." - */ - if (rocmPathSet) { - // For STG2 testing where /opt/rocm path is not present - snprintf(command, COMMAND_LEN, - "$ROCM_PATH/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl "); - } else { - snprintf(command, COMMAND_LEN, - "/opt/rocm/llvm/bin/clang -target amdgcn-amd-amdhsa -x cl "); - } - char command_temp[COMMAND_LEN] = ""; - snprintf(command_temp, COMMAND_LEN, - "-include `find /opt/rocm* -name opencl-c.h` %s %s -mcpu=%s -o %s %s", - pCodeObjVer, v3option, props.gcnArchName, pCodeObjFile, OPENCL_OBJ_FILE); - - strncat(command, command_temp, COMMAND_LEN); - printf("command executed = %s\n", command); - - system((const char*)command); - // Check if the code object file is created - snprintf(command, COMMAND_LEN, "./%s", - pCodeObjFile); - - if (access(command, F_OK) == -1) { - printf("Code Object File not found \n"); - passed(); - } - TestPassed = testStaticLdsCodeObj(pCodeObjFile); - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -#else - printf("This test is skipped due to non linux environment.\n"); - passed(); -#endif -} diff --git a/tests/src/runtimeApi/module/kernel_composite_test.cpp b/tests/src/runtimeApi/module/kernel_composite_test.cpp deleted file mode 100644 index 3923b635eb..0000000000 --- a/tests/src/runtimeApi/module/kernel_composite_test.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" -#define GLOBAL_BUF_SIZE 2048 - -__device__ float deviceGlobalFloat; -__device__ int deviceGlobalInt1; -__device__ int deviceGlobalInt2; -__device__ short deviceGlobalShort; -__device__ char deviceGlobalChar; - -__device__ int getSquareOfGlobalFloat() { - return static_cast(deviceGlobalFloat*deviceGlobalFloat); -} - -extern "C" __global__ void testWeightedCopy(int* a, int* b) { - int tx = threadIdx.x; - b[tx] = deviceGlobalInt1*a[tx] + deviceGlobalInt2 + - static_cast(deviceGlobalShort) + static_cast(deviceGlobalChar) - + getSquareOfGlobalFloat(); -} diff --git a/tests/src/runtimeApi/module/managed_kernel.cpp b/tests/src/runtimeApi/module/managed_kernel.cpp deleted file mode 100644 index 745c92b02d..0000000000 --- a/tests/src/runtimeApi/module/managed_kernel.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "hip/hip_runtime.h" -__managed__ int x = 10; - -extern "C" __global__ void GPU_func() { - x++; -} diff --git a/tests/src/runtimeApi/module/matmul.cpp b/tests/src/runtimeApi/module/matmul.cpp deleted file mode 100644 index de7c935f53..0000000000 --- a/tests/src/runtimeApi/module/matmul.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* -Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include"hip/hip_runtime.h" -__device__ int deviceGlobal = 1; - -extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C, - int N) { - int ROW = blockIdx.y*blockDim.y+threadIdx.y; - int COL = blockIdx.x*blockDim.x+threadIdx.x; - int tmpSum = 0; - if ((ROW < N) && (COL < N)) { - // each thread computes one element of the block sub-matrix - for (int i = 0; i < N; i++) { - tmpSum += A[ROW * N + i] * B[i * N + COL]; - } - C[ROW * N + COL] = tmpSum; - } -} - -extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C, - int *D, int N) { - int ROW = blockIdx.y*blockDim.y+threadIdx.y; - int COL = blockIdx.x*blockDim.x+threadIdx.x; - int tmpSum = 0; - if (ROW < N && COL < N) { - // each thread computes one element of the block sub-matrix - for (int i = 0; i < N; i++) { - tmpSum += A[ROW * N + i] * B[i * N + COL]; - } - } - C[ROW * N + COL] = tmpSum; - D[ROW * N + COL] = tmpSum; -} - -extern "C" __global__ void SixteenSecKernel(int clockrate) { - uint64_t wait_t = 16000, - start = clock64()/clockrate, cur; - do { cur = clock64()/clockrate-start;}while (cur < wait_t); -} - -extern "C" __global__ void TwoSecKernel(int clockrate) { - if (deviceGlobal == 0x2222) { - deviceGlobal = 0x3333; - } - uint64_t wait_t = 2000, - start = clock64()/clockrate, cur; - do { cur = clock64()/clockrate-start;}while (cur < wait_t); - if (deviceGlobal != 0x3333) { - deviceGlobal = 0x5555; - } -} - -extern "C" __global__ void FourSecKernel(int clockrate) { - if (deviceGlobal == 1) { - deviceGlobal = 0x2222; - } - uint64_t wait_t = 4000, - start = clock64()/clockrate, cur; - do { cur = clock64()/clockrate-start;}while (cur < wait_t); - if (deviceGlobal == 0x2222) { - deviceGlobal = 0x4444; - } -} - -extern "C" __global__ void SixteenSecKernel_gfx11(int clockrate) { -#ifdef __HIP_PLATFORM_AMD__ - uint64_t wait_t = 16000, - start = wall_clock64()/clockrate, cur; - do { cur = wall_clock64()/clockrate-start;}while (cur < wait_t); -#endif -} - -extern "C" __global__ void TwoSecKernel_gfx11(int clockrate) { -#ifdef __HIP_PLATFORM_AMD__ - if (deviceGlobal == 0x2222) { - deviceGlobal = 0x3333; - } - uint64_t wait_t = 2000, - start = wall_clock64()/clockrate, cur; - do { cur = wall_clock64()/clockrate-start;}while (cur < wait_t); - if (deviceGlobal != 0x3333) { - deviceGlobal = 0x5555; - } -#endif -} - -extern "C" __global__ void FourSecKernel_gfx11(int clockrate) { -#ifdef __HIP_PLATFORM_AMD__ - if (deviceGlobal == 1) { - deviceGlobal = 0x2222; - } - uint64_t wait_t = 4000, - start = wall_clock64()/clockrate, cur; - do { cur = wall_clock64()/clockrate-start;}while (cur < wait_t); - if (deviceGlobal == 0x2222) { - deviceGlobal = 0x4444; - } -#endif -} - -extern "C" __global__ void dummyKernel() { -} diff --git a/tests/src/runtimeApi/module/opencl_add.cpp b/tests/src/runtimeApi/module/opencl_add.cpp deleted file mode 100644 index 2341b74115..0000000000 --- a/tests/src/runtimeApi/module/opencl_add.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -kernel void add(global float* output, global float* input) { - __local float lds[100]; - int id = get_global_id(0); - - if (id == 0) { - for (int i = 0; i < 100; i++) { - lds[i] = input[i]; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (id < 100) { - output[id] = lds[id]; - } else { - output[id] = input[id]; - } -} diff --git a/tests/src/runtimeApi/module/tex2d_kernel.cpp b/tests/src/runtimeApi/module/tex2d_kernel.cpp deleted file mode 100644 index f1d008e930..0000000000 --- a/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "hip/hip_runtime.h" - -texture ftex; -texture itex; -texture stex; -texture ctex; - -__device__ float deviceGlobalFloat; - -extern "C" __global__ void tex2dKernelFloat(float* outputData, - int width, int height) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if ((x < width) && (y < width)) { - outputData[y * width + x] = tex2D(ftex, x, y); - } -#endif -} - -extern "C" __global__ void tex2dKernelInt(int* outputData, - int width, int height) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if ((x < width) && (y < width)) { - outputData[y * width + x] = tex2D(itex, x, y); - } -#endif -} - -extern "C" __global__ void tex2dKernelInt16(short* outputData, - int width, int height) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if ((x < width) && (y < width)) { - outputData[y * width + x] = tex2D(stex, x, y); - } -#endif -} - -extern "C" __global__ void tex2dKernelInt8(char* outputData, - int width, int height) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; - int y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; - if ((x < width) && (y < width)) { - outputData[y * width + x] = tex2D(ctex, x, y); - } -#endif -} diff --git a/tests/src/runtimeApi/module/vcpy_kernel.code b/tests/src/runtimeApi/module/vcpy_kernel.code deleted file mode 100644 index 2aa9f253b3334b6d0769dfaeb2a0a1100ec400f8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18343 zcmeHO-ES0C6u;fAw8f92zMw%}BZ*b%xIk?d6LD*oQj?{Ck*F^Y)7{zGNoQv!JG*5| zf-OW79!Nx<v-h0sbnM!0Z4JV`$;`Q5 z=iEDI?$>Y58C@UD4vg&7cMlC^cMoLrm-dZhvO9ErjgaBBHi{5=h-lFZD!sK-zuw>1 zTXn}=?||Fu*lxAP3^Vu2nj4tG_Ir&|-paX1@G1sIuT`j}l3P2vWu+zaY)dA;Ai*v0 z^jlBMc23K#F@%2(znp#e-l@aV%TBJl5 z-0u_j2vmQ2Jx`XWC4o{Iej~w5J1HNtk*`^p?T{r*#X6ZrzPm8Wf~cQ@=&-CNn1=bV zufxPQ+#@S8^@)#Zly8>!W>c~vQ(^;>kx}Yr3{tJMN8-gRKUVUnm&N*diS$G26?=h? zr-JOr((c$6UMcMG0(1iwLSRZsfz7XhL;~o*uFTFo`-C`hPEHnW%q+X66TJ8CY0W5G z!O3^dX+}O@HiJM9d?RO`wM^G6+qtuvS11H#MV!_G`=Hr5t&JN_)zrsqH~*RCc%z1+ zk5&r>v&y_Q#*O0M($ucII z%1A6(#=)qJsCLfCe)ALdEFgKBp3wDNGc4{yJGF?kk%vq3kEEIJl-Yd*3Wdk-h+*NndaRA_nQae zu-Zh{_kPH~KfXpx(K^UnD4(x~Ar5<+*elu-n0Ne{N868{IeB9LwV`V-Y`5OWeWbpZ z0L68UmOzNj1?0mJ$D6JfZC}ru<95!}i>Bev&+*{}X{3Q<$RtqiLPC zPsK4P>}pz}wSt&fJBu7gMEk}Lar8>3ICW)G9J|^Ui@gO(*)kQ2!H&1*22_d1AumRG zKGYGyzabTK_$u!`uT$i16bgY}TSL#A=zY;~LhO*$#4mr|^f3Aj@$)|O2MeZpQIfS? z*lnIqWV4w>pOzFkoM{Ofjfs?&Ohh^6r|a#LQAM2B*cP9wcpTyZ z{@0r~J?7`}#&i?$b2;-Ya!wHB#3m5V?rWGrO9&|WTKiYxkh_}8TLY(IfpI7<(j5@-=`r~D7 zJLNGy?*n|kZk(g*+=7qcloA3*A@-m3%mvK9ku!*5T?|aK5NF-h3+VO}b@Vu;5foxQ z?mw&PRmkJMQ`I`D(SlqEXF{CYgj^&E`fBl)^L?Q06J$?+lY%D$%TYd8{= zeNE1Xv?PstG_CAwcZPi}K@JXEVIIc1-WRs7Ew#OxLfId3W}8em^M~A1uPwx#>koN;|1r=(mxQyVjpdV===>SDe+PfazK%Hf zL&m`$66|Y2bYBC}$6B{@oJP|k!H0uA!^M=poj2D*b z|H<=<@k9eRP@C&QZDX{V|0nZrjMQg5z_gkF=XZo>juU^Y{+~s1XfHHrpR=3!e|}B; zE9L*m_!Qrl!lxp%j`N#P_*DI`$A84B|MlemjjjLvr|gOUD|@2Cr=dUi{+s{x6h6I` jFtEa>^8B;dcO!*QrJfW%m3mP4G=du_sqiW5Mbf_jX&~li diff --git a/tests/src/runtimeApi/module/vcpy_kernel.cpp b/tests/src/runtimeApi/module/vcpy_kernel.cpp deleted file mode 100644 index 214a869b22..0000000000 --- a/tests/src/runtimeApi/module/vcpy_kernel.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" - -extern "C" __global__ void hello_world(float* a, float* b) { - int tx = threadIdx.x; - b[tx] = a[tx]; -} diff --git a/tests/src/runtimeApi/multiThread/hipMultiThreadDevice.cpp b/tests/src/runtimeApi/multiThread/hipMultiThreadDevice.cpp deleted file mode 100644 index 18bf9c0bd2..0000000000 --- a/tests/src/runtimeApi/multiThread/hipMultiThreadDevice.cpp +++ /dev/null @@ -1,148 +0,0 @@ -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST_NAMED: %t hipMultiThreadDevice-serial --tests 0x1 - * TEST_NAMED: %t hipMultiThreadDevice-pyramid --tests 0x4 - * TEST_NAMED: %t hipMultiThreadDevice-nearzero --tests 0x10 - * HIT_END - */ - -#include "hip/hip_runtime_api.h" -#include "test_common.h" - -#ifdef _WIN32 -#define MAX_BURST_SIZE 40 -#else -#define MAX_BURST_SIZE 100 -#endif - -// Create a lot of streams and then destroy 'em. -void createThenDestroyStreams(int iterations, int burstSize) { - hipStream_t* streams = new hipStream_t[burstSize]; - - for (int i = 0; i < iterations; i++) { - if (p_verbose & 0x1) { - printf("%s iter=%d, create %d then destroy %d\n", __func__, i, burstSize, burstSize); - } - for (int j = 0; j < burstSize; j++) { - if (p_verbose & 0x2) { - printf(" %d.%d streamCreate\n", i, j); - } - HIPCHECK(hipStreamCreate(&streams[j])); - } - for (int j = 0; j < burstSize; j++) { - if (p_verbose & 0x2) { - printf(" %d.%d streamDestroy\n", i, j); - } - HIPCHECK(hipStreamDestroy(streams[j])); - } - } - - delete[] streams; -} - - -void waitStreams(int iterations) { - // Repeatedly sync and wait for all streams to complete. - // TO make this interesting, the test has other threads repeatedly adding and removing streams - // to the device. - for (int i = 0; i < iterations; i++) { - HIPCHECK(hipDeviceSynchronize()); - } -} - - -// Create 3 streams, all creating and destroying streams on the same device. -// Some create many queue, some not many. -// -void multiThread_pyramid(bool serialize, int iters) { - printf("%s creating %d streams\n", __func__, iters * MAX_BURST_SIZE); - std::thread t1(createThenDestroyStreams, iters * 1, MAX_BURST_SIZE); - if (serialize) { - t1.join(); - printf("t1 done\n"); - } - - std::thread t2(createThenDestroyStreams, iters * 10, 10); - if (serialize) { - t2.join(); - printf("t2 done\n"); - } - - std::thread t3(createThenDestroyStreams, iters * 100, 1); - if (serialize) { - t3.join(); - printf("t3 done\n"); - } - - if (!serialize) { - t1.join(); - t2.join(); - t3.join(); - } -} - - -// Create 3 streams, all creating and destroying streams on the same device. -// Try to keep number of streams near zero, to cause problems. -void multiThread_nearzero(bool serialize, int iters) { - printf("%s creating %d streams x 3 threads\n", __func__, iters); - std::thread t1(createThenDestroyStreams, iters, 1); - if (serialize) { - t1.join(); - printf("t1 done\n"); - } - - std::thread t2(createThenDestroyStreams, iters, 1); - if (serialize) { - t2.join(); - printf("t2 done\n"); - } - - std::thread t3(waitStreams, iters * 50); - if (serialize) { - t3.join(); - printf("t3 done\n"); - } - - if (!serialize) { - t1.join(); - printf("t1 done\n"); - t2.join(); - printf("t2 done\n"); - t3.join(); - printf("t3 done\n"); - } -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - // Serial version, just call once: - if (p_tests & 0x1) { - printf("\ntest 0x1 : serial createThenDestroyStreams(10) \n"); - createThenDestroyStreams(10, 10); - }; - - /*disable, this takess a while and if the next one works then no need to run serial*/ - if (1 && (p_tests & 0x2)) { - printf("\ntest 0x2 : serialized multiThread_pyramid(1) \n"); - multiThread_pyramid(true, 3); - } - - if (p_tests & 0x4) { - printf("\ntest 0x4 : parallel multiThread_pyramid(1) \n"); - multiThread_pyramid(false, 3); - } - - // if (p_tests & 0x8) { - // printf ("test 0x8 : multiThread_pyramid(100) \n"); - // multiThread_pyramid(false, 100); - // } - - if (p_tests & 0x10) { - printf("\ntest 0x10 : parallel multiThread_nearzero(1000) \n"); - multiThread_nearzero(false, 1000); - } - - passed(); -} diff --git a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp b/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp deleted file mode 100644 index b8fca4d02f..0000000000 --- a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -int p_iters = 10; - -void printSep() { - printf( - "======================================================================================\n"); -} - -//--- -// Test simple H2D copies and back. -// Designed to stress a small number of simple smoke tests - -template -void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) { - using HipTest::MemTraits; - - std::thread::id pid = std::this_thread::get_id(); - - printf("test: %s <%s> %s %s\n", __func__, TYPENAME(T), P::str(), C::str()); - size_t Nbytes = numElements * sizeof(T); - printf("numElements=%zu Nbytes=%6.2fMB\n", numElements, Nbytes / 1024.0 / 1024.0); - - T *A_d, *B_d, *C_d; - T *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned); - for (size_t i = 0; i < numElements; i++) { - A_h[i] = 1000.0f; - B_h[i] = 2000.0f; - C_h[i] = -1; - } - - - MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); - MemTraits::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); - MemTraits::Copy(C_d, C_h, Nbytes, hipMemcpyHostToDevice, stream); - HIPCHECK(hipDeviceSynchronize()); - - for (size_t i = 0; i < numElements; i++) { - A_h[i] = 1.0f; - B_h[i] = 2.0f; - C_h[i] = -1; - } - - - for (int i = 0; i < iters; i++) { - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - - MemTraits::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); - MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); - - // HIPCHECK(hipStreamSynchronize(stream)); - - // This is the null stream? - // hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, - // C_d, numElements); - hipLaunchKernelGGL(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, - static_cast(A_d), static_cast(B_d), C_d, numElements); - - MemTraits::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream); - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkVectorADD(A_h, B_h, C_h, numElements); - } - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned); - std::cout << " pid" << pid << " success\n"; - HIPCHECK(hipDeviceSynchronize()); -} - -template -void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t stream1, - bool serialize) { - printSep(); - printf("%s\n", __func__); - std::cout << testName << std::endl; - - size_t numElements = N; - - // Test 2 threads operating on same stream: - std::thread t1(simpleVectorAdd, numElements, p_iters /*iters*/, stream0); - if (serialize) { - t1.join(); - } - std::thread t2(simpleVectorAdd, numElements, p_iters /*iters*/, stream1); - if (serialize) { - t2.join(); - } - - if (!serialize) { - t1.join(); - t2.join(); - } - - HIPCHECK(hipDeviceSynchronize()); -}; - - -int main(int argc, char* argv[]) { - N = 8000000; - HipTest::parseStandardArguments(argc, argv, true); - - printf("info: set device to %d\n", p_gpuDevice); - HIPCHECK(hipSetDevice(p_gpuDevice)); - - - if (p_tests & 0x1) { - HIPCHECK(hipDeviceReset()); - - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - simpleVectorAdd(N /*mb*/, 10 /*iters*/, - stream); - simpleVectorAdd(N /*mb*/, 10 /*iters*/, stream); - - HIPCHECK(hipStreamDestroy(stream)); - } - - - hipStream_t stream0, stream1; - HIPCHECK(hipStreamCreate(&stream0)); - HIPCHECK(hipStreamCreate(&stream1)); - - if (p_tests & 0x2) { - // Easy tests to verify the test works - these don't allow overlap between the threads: - test_multiThread_1("Multithread NULL with serialized", NULL, - NULL, true); - test_multiThread_1("Multithread two streams serialized", - stream0, stream1, true); - } - - if (p_tests & 0x4) { - // test_multiThread_1 ("Multithread with NULL stream", NULL, - // NULL, false); test_multiThread_1 ("Multithread with two - // streams", stream0, stream1, false); - test_multiThread_1("Multithread with one stream", stream0, - stream0, false); - } - - passed(); -} diff --git a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp b/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp deleted file mode 100644 index 752ffd870d..0000000000 --- a/tests/src/runtimeApi/multiThread/hipMultiThreadStreams2.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#include "test_common.h" -#include -#define N 1000 - - -template -__global__ void Inc(T* Array) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - Array[tx] = Array[tx] + T(1); -} - -void run1(size_t size, hipStream_t stream) { - float *Ah, *Bh, *Cd, *Dd, *Eh; - - HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault)); - HIPCHECK(hipMalloc(&Cd, size)); - HIPCHECK(hipMalloc(&Dd, size)); - HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault)); - - for (int i = 0; i < N; i++) { - Ah[i] = 1.0f; - } - - HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream)); - HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream, Cd); - HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream)); - HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream)); - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Eh[10] == Ah[10] + 1.0f); -} - - -void run(size_t size, hipStream_t stream1, hipStream_t stream2) { - float *Ah, *Bh, *Cd, *Dd, *Eh; - float *Ahh, *Bhh, *Cdd, *Ddd, *Ehh; - - HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault)); - HIPCHECK(hipMalloc(&Cd, size)); - HIPCHECK(hipMalloc(&Dd, size)); - HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&Ahh, size, hipHostMallocDefault)); - HIPCHECK(hipHostMalloc((void**)&Bhh, size, hipHostMallocDefault)); - HIPCHECK(hipMalloc(&Cdd, size)); - HIPCHECK(hipMalloc(&Ddd, size)); - HIPCHECK(hipHostMalloc((void**)&Ehh, size, hipHostMallocDefault)); - - HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream1)); - HIPCHECK(hipMemcpyAsync(Bhh, Ahh, size, hipMemcpyHostToHost, stream2)); - HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1)); - HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream1, Cd); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream2, Cdd); - HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1)); - HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2)); - HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1)); - HIPCHECK(hipMemcpyAsync(Ehh, Ddd, size, hipMemcpyDeviceToHost, stream2)); - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Eh[10] = Ah[10] + 1.0f); - HIPASSERT(Ehh[10] = Ahh[10] + 1.0f); - - HIPCHECK(hipHostFree(Ah)); - HIPCHECK(hipHostFree(Bh)); - HIPCHECK(hipHostFree(Eh)); - HIPCHECK(hipHostFree(Ahh)); - HIPCHECK(hipHostFree(Bhh)); - HIPCHECK(hipHostFree(Ehh)); - HIPCHECK(hipFree(Cd)); - HIPCHECK(hipFree(Dd)); - HIPCHECK(hipFree(Cdd)); - HIPCHECK(hipFree(Ddd)); -} - -int main(int argc, char** argv) { - iterations = 100; - - HipTest::parseStandardArguments(argc, argv, true); - - - hipStream_t stream[3]; - for (int i = 0; i < 3; i++) { - HIPCHECK(hipStreamCreate(&stream[i])); - } - - const size_t size = N * sizeof(float); - - for (int i = 0; i < iterations; i++) { - std::thread t1(run1, size, stream[0]); - std::thread t2(run1, size, stream[0]); - std::thread t3(run, size, stream[1], stream[2]); - t1.join(); - // std::cout<<"T1"< -#include -#include "test_common.h" - -__global__ void f1(float *a) { *a = 1.0; } - -template -__global__ void f2(T *a) { *a = 1; } - -/** - * Defines - */ -#define OccupancyDisableCachingOverride 0x01 - -/** - * Performs argument validation - */ -bool argValidation() { - bool TestPassed = true; - hipError_t ret; - int numBlock = 0, blockSize = 0; - int gridSize = 0, defBlkSize = 32; - - // Get potential blocksize - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0)); - - // Validate each argument - if ((ret = hipOccupancyMaxActiveBlocksPerMultiprocessor(NULL, f1, - blockSize, 0)) != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for" - " numBlock(NULL). Error: '%s'(%d)\n", hipGetErrorString(ret), ret); - TestPassed &= false; - } - - ret = hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, NULL, - blockSize, 0); - if (ret != hipErrorInvalidValue && ret != hipErrorInvalidDeviceFunction) { - printf("ArgValidation : Inappropriate error value returned for" - " kernelfunc(NULL). numBlk %d, Error: '%s'(%d)\n", numBlock, - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, - f1, 0, 0)) != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for" - " blksize(0), shm(0). numBlk %d, Error: '%s'(%d)\n", numBlock, - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, - f1, 0, std::numeric_limits::max())) - != hipErrorInvalidValue) { - printf("ArgValidation : Inappropriate error value returned for" - " blksize(0), shm(max). numBlk %d, Error: '%s'(%d)\n", numBlock, - hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlock, - f1, defBlkSize, 0, OccupancyDisableCachingOverride)) - != hipSuccess) { - printf("ArgValidation : Occupancy api with flags returned '%s'(%d)." - " Expected to return hipSuccess(0)\n", hipGetErrorString(ret), ret); - TestPassed &= false; - } - - return TestPassed; -} - - -/** - * Performs range validation on api output - */ -bool rangeValidation() { - hipDeviceProp_t devProp; - bool TestPassed = true; - int numBlock = 0, blockSize = 0; - int gridSize = 0; - - // Get potential blocksize - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0)); - - HIPCHECK(hipGetDeviceProperties(&devProp, 0)); - - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, f1, - blockSize, 0)); - - // Check if numBlocks and blockSize are within limits - if ((numBlock <= 0) || - ((numBlock * blockSize) > devProp.maxThreadsPerMultiProcessor)) { - printf("RangeValidation : numBlock %d returned not in range." - "numblk(%d),blocksize(%d) and maxThrdsMP %d", numBlock, numBlock, - blockSize, devProp.maxThreadsPerMultiProcessor); - TestPassed &= false; - } - - // Validate numBlock after passing dynSharedMemPerBlk - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, f1, - blockSize, devProp.sharedMemPerBlock)); - - // Check if numBlocks and blockSize are within limits - if ((numBlock <= 0) || - ((numBlock * blockSize) > devProp.maxThreadsPerMultiProcessor)) { - printf("RangeValidation : numBlock %d returned not in range." - "numblk(%d),blocksize(%d),shm and maxThrdsMP %d", numBlock, numBlock, - blockSize, devProp.maxThreadsPerMultiProcessor); - TestPassed &= false; - } - - return TestPassed; -} - -/** - * Test case for using kernel function pointer with template - */ -bool templateInvocation() { - bool TestPassed = true; - int blockSize = 32; - int numBlock = 0; - - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor - (&numBlock, f2, blockSize, 0)); - if (!numBlock) { - printf("TemplateInvocation : numBlock received as zero"); - TestPassed &= false; - } - - return TestPassed; -} - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - - if (p_tests == 1) { - TestPassed = argValidation(); - } else if (p_tests == 2) { - TestPassed = rangeValidation(); - } else if (p_tests == 3) { - TestPassed = templateInvocation(); - } else { - printf("Didnt receive any valid option. Try options 1 to 3\n"); - TestPassed = false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipOccupancyMaxActiveBlocksPerMultiprocessor validation Failed!"); - } -} - diff --git a/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp deleted file mode 100644 index 49f0ed1bf7..0000000000 --- a/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - - (TestCase 1):: - 1) Pass gridSize as NULL and check appropriate error-code is returned. - 2) Pass blockSize as NULL and check appropriate error-code is returned. - 3) Pass invalid kernel function/NULL and check the api behavior. - - (TestCase 2):: - 4) Validate range by making sure blockSize returned by api doesn't exceed - devProp.maxThreadsPerBlock. - 5) Pass dynSharedMemPerBlk, blockSizeLimit and check out parameter range. - - (TestCase 3):: - 6) Test case for using kernel function pointer with template. - -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t --tests 1 - * TEST: %t --tests 2 - * TEST: %t --tests 3 - * HIT_END - */ - -#include -#include -#include "test_common.h" - -__global__ void f1(float *a) { *a = 1.0; } - -template -__global__ void f2(T *a) { *a = 1; } - - -/** - * Performs argument validation - */ -bool argValidation() { - bool TestPassed = true; - hipError_t ret; - int blockSize = 0; - int gridSize = 0; - - // Validate each argument - if ((ret = hipOccupancyMaxPotentialBlockSize(NULL, &blockSize, - f1, 0, 0)) != hipErrorInvalidValue) { - printf("ArgValidation : Inappropritate error value returned for" - " gridSize(NULL). blksize rcvd %d, Error: '%s'(%d)\n", - blockSize, hipGetErrorString(ret), ret); - TestPassed &= false; - } - - if ((ret = hipOccupancyMaxPotentialBlockSize(&gridSize, NULL, - f1, 0, 0)) != hipErrorInvalidValue) { - printf("ArgValidation : Inappropritate error value returned for" - " blockSize(NULL). gridSize rcvd %d, Error: '%s'(%d)\n", - gridSize, hipGetErrorString(ret), ret); - TestPassed &= false; - } - -#ifndef __HIP_PLATFORM_NVIDIA__ - // nvcc doesnt support kernelfunc(NULL) for api - ret = hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, NULL, 0, 0); - if (ret != hipErrorInvalidValue && ret != hipErrorInvalidDeviceFunction) { - printf("ArgValidation : Inappropritate error value returned for" - " kernelfunc(NULL). gridSize %d, blkSize %d, Error: '%s'(%d)\n", - gridSize, blockSize, hipGetErrorString(ret), ret); - TestPassed &= false; - } -#endif - - return TestPassed; -} - - -/** - * Performs range validation on api output - */ -bool rangeValidation() { - hipDeviceProp_t devProp; - bool TestPassed = true; - int blockSize = 0; - int gridSize = 0; - - // Get potential blocksize - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0)); - - HIPCHECK(hipGetDeviceProperties(&devProp, 0)); - - // Check if blockSize doen't exceed maxThreadsPerBlock - if ((gridSize <= 0) || (blockSize <= 0) || - (blockSize > devProp.maxThreadsPerBlock)) { - printf("RangeValidation : grdSize %d/blkSize %d returned not in range(%d)", - gridSize, blockSize, devProp.maxThreadsPerBlock); - TestPassed &= false; - } - - // Pass dynSharedMemPerBlk, blockSizeLimit and check out param - blockSize = 0; - gridSize = 0; - - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, - devProp.sharedMemPerBlock, devProp.maxThreadsPerBlock)); - - if ((gridSize <= 0) || (blockSize <= 0) || - (blockSize > devProp.maxThreadsPerBlock)) { - printf("RangeValidation(Shm,TPB) : grdSize %d/blkSize %d returned" - "not in range(%d)", gridSize, blockSize, devProp.maxThreadsPerBlock); - TestPassed &= false; - } - - - return TestPassed; -} - -/** - * Test case for using kernel function pointer with template - */ -bool templateInvocation() { - bool TestPassed = true; - int gridSize = 0, blockSize = 0; - int numBlock = 0; - - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, - &blockSize, f2, 0, 0)); - if (!gridSize || !blockSize) { - printf("TemplateInvocation : gridSize/blockSize received as zero"); - TestPassed &= false; - } - - return TestPassed; -} - - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - bool TestPassed = true; - - if (p_tests == 1) { - TestPassed = argValidation(); - } else if (p_tests == 2) { - TestPassed = rangeValidation(); - } else if (p_tests == 3) { - TestPassed = templateInvocation(); - } else { - printf("Didnt receive any valid option. Try options 1 to 3\n"); - TestPassed = false; - } - - if (TestPassed) { - passed(); - } else { - failed("hipOccupancyMaxPotentialBlockSize validation Failed!"); - } -} - diff --git a/tests/src/runtimeApi/p2p/hipDeviceGetP2PAttribute.cpp b/tests/src/runtimeApi/p2p/hipDeviceGetP2PAttribute.cpp deleted file mode 100644 index a1f2850320..0000000000 --- a/tests/src/runtimeApi/p2p/hipDeviceGetP2PAttribute.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - -void runTest(int srcDevice, int dstDevice) { - std::cout<<"Between Device "<(p2p_attr_idx), - srcDevice, dstDevice)); - std::cout<<"Attr["<= 2){ -#ifdef _WIN64 - std::cout<<"XGMI p2p attribute test no supported on windows"< -#include -#include -#include -#ifdef __linux__ -#include -#include -#include -#endif -#include "test_common.h" -#include "hipP2pLinkTypeAndHopFunc.h" -/** - * Fetches Gpu device count - */ -void getDeviceCount(int *pdevCnt) { -#ifdef __linux__ - int fd[2], val = 0; - pid_t childpid; - - // create pipe descriptors - pipe(fd); - - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - - childpid = fork(); - - if (childpid > 0) { // Parent - close(fd[1]); - // parent will wait to read the device cnt - read(fd[0], &val, sizeof(val)); - - // close the read-descriptor - close(fd[0]); - - // wait for child exit - wait(NULL); - - *pdevCnt = val; - } else if (!childpid) { // Child - int devCnt = 1; - // writing only, no need for read-descriptor - close(fd[0]); - - HIPCHECK(hipGetDeviceCount(&devCnt)); - // send the value on the write-descriptor: - write(fd[1], &devCnt, sizeof(devCnt)); - - // close the write descriptor: - close(fd[1]); - exit(0); - } else { // failure - *pdevCnt = 1; - return; - } -#else - HIPCHECK(hipGetDeviceCount(pdevCnt)); -#endif -} -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: device1 is visible and device2 is masked - */ -#ifdef __linux__ -#define MAX_SIZE 30 -#define VISIBLE_DEVICE 0 -bool testMaskedDevice(int actualNumGPUs) { - bool testResult = true; - int device; - int fd[2]; - pipe(fd); - - pid_t cPid; - cPid = fork(); - if (cPid == 0) { // child - hipError_t err; - char visibleDeviceString[MAX_SIZE] = {}; - snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE); - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1); - setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1); - uint32_t linktype; - uint32_t hopcount; - for (int count = 1; - count < actualNumGPUs; count++) { - err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE, - VISIBLE_DEVICE+count, &linktype, &hopcount); - if (err == hipSuccess) { - testResult &= false; - } else { - printf("testMaskedDevice: Error Code Returned: '%s'(%d)\n", - hipGetErrorString(err), err); - } - } - close(fd[0]); - write(fd[1], &testResult, sizeof(testResult)); - close(fd[1]); - exit(0); - - } else if (cPid > 0) { // parent - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - - } else { - printf("fork() failed\n"); - testResult = false; - } - return testResult; -} -#endif -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: Invalid Device Number(s) - */ -bool testhipInvalidDevice(int numDevices) { - bool TestPassed = true; - hipError_t ret; - uint32_t linktype; - uint32_t hopcount; - if ((ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount)) - == hipSuccess) { - TestPassed &= false; - } - if ((ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, - &hopcount)) == hipSuccess) { - TestPassed &= false; - } - if ((ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount)) - == hipSuccess) { - TestPassed &= false; - } - if ((ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, - &hopcount)) == hipSuccess) { - TestPassed &= false; - } - if ((ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, - &hopcount)) == hipSuccess) { - TestPassed &= false; - } - return TestPassed; -} - -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: linktype = NULL - */ -bool testhipInvalidLinkType() { - uint32_t hopcount; - if (hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr, &hopcount)) { - return true; - } else { - printf("Test Failed as linktype = NULL returns hipSuccess \n"); - } - return false; -} - -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: hopcount = NULL - */ -bool testhipInvalidHopcount() { - uint32_t linktype; - if (hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr)) { - return true; - } else { - printf("Test Failed as hopcount = NULL returns hipSuccess \n"); - } - return false; -} - -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: device1 = device2 - */ -bool testhipSameDevice(int numGPUs) { - hipError_t ret; - uint32_t linktype = 0; - uint32_t hopcount = 0; - for (int gpuId = 0; gpuId < numGPUs; gpuId++) { - if ((ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, - &hopcount)) == hipSuccess) { - return false; - } - } - return true; -} -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2) - * and (src = device2, dest = device1), where device1 and device2 are valid device numbers. - */ -bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) { - bool TestPassed = true; - // Get the unique pair of devices - for (int x = 0; x < numDevices; x++) { - for (int y = x+1; y < numDevices; y++) { - uint32_t linktype1 = 0, linktype2 = 0; - uint32_t hopcount1 = 0, hopcount2 = 0; - HIPCHECK(hipExtGetLinkTypeAndHopCount(x, y, - &linktype1, &hopcount1)); - HIPCHECK(hipExtGetLinkTypeAndHopCount(y, x, - &linktype2, &hopcount2)); - if (hopcount1 != hopcount2) { - TestPassed = false; - break; - } - } - } - return TestPassed; -} - -#ifdef __linux__ -/** - * Internal Function - */ -bool validateLinkType(uint32_t linktype_Hip, - RSMI_IO_LINK_TYPE linktype_RocmSmi) { - bool TestPassed = false; - - if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) && - (linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) { - TestPassed = true; - } else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) && - (linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) { - TestPassed = true; - } else { - printf("linktype Hip = %u, linktype RocmSmi = %u\n", - linktype_Hip, linktype_RocmSmi); - TestPassed = false; - } - return TestPassed; -} - -/** - * Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * Test Scenario: Verify (hopcount, linktype) values for all combination of - * GPUs with the output of rocm_smi tool. - */ -bool testhipLinkTypeHopcountDevice(int numDevices) { - bool TestPassed = true; - // Opening and initializing rocm-smi library - void *lib_rocm_smi_hdl; - rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*, - RSMI_IO_LINK_TYPE*); - rsmi_status_t (*fntopo_init)(uint64_t); - rsmi_status_t (*fntopo_shut_down)(); - - lib_rocm_smi_hdl = dlopen("/opt/rocm/rocm_smi/lib/librocm_smi64.so", - RTLD_LAZY); - if (!lib_rocm_smi_hdl) { - printf("Error opening /opt/rocm/rocm_smi/lib/librocm_smi64.so\n"); - printf("Skipping this test\n"); - passed(); - } - - void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type"); - if (!fnsym) { - printf("Error getting rsmi_topo_get_link_type() function\n"); - printf("Skipping this test\n"); - dlclose(lib_rocm_smi_hdl); - passed(); - } - fntopo_get_link_type = reinterpret_cast(fnsym); - - fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init"); - if (!fnsym) { - printf("Error getting rsmi_init() function\n"); - printf("Skipping this test\n"); - dlclose(lib_rocm_smi_hdl); - passed(); - } - fntopo_init = reinterpret_cast(fnsym); - - fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down"); - if (!fnsym) { - printf("Error getting rsmi_shut_down() function\n"); - printf("Skipping this test\n"); - dlclose(lib_rocm_smi_hdl); - passed(); - } - fntopo_shut_down = reinterpret_cast(fnsym); - - uint64_t init_flags = 0; - rsmi_status_t retsmi_init; - retsmi_init = fntopo_init(init_flags); - if (RSMI_STATUS_SUCCESS != retsmi_init) { - printf("Error when initializing rocm_smi\n"); - printf("Skipping this test\n"); - dlclose(lib_rocm_smi_hdl); - fntopo_shut_down(); - passed(); - } - // Use rocm-smi API rsmi_topo_get_link_type() to validate - struct devicePair { - int device1; - int device2; - }; - std::vector devicePairList; - // Get the unique pair of devices - for (int x = 0; x < numDevices; x++) { - for (int y = x+1; y < numDevices; y++) { - devicePairList.push_back({x, y}); - } - } - for (auto pos=devicePairList.begin(); - pos != devicePairList.end(); pos++) { - uint32_t linktype1 = 0; - uint32_t hopcount1 = 0; - RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED; - uint64_t hopcount2 = 0; - rsmi_status_t retsmi; - HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1, - (*pos).device2, &linktype1, &hopcount1)); - retsmi = fntopo_get_link_type((*pos).device1, - (*pos).device2, &hopcount2, &linktype2); - if (RSMI_STATUS_SUCCESS != retsmi) { - printf("Error returned from rsmi_topo_get_link_type() function\n"); - printf("Skipping this test\n"); - fntopo_shut_down(); - dlclose(lib_rocm_smi_hdl); - passed(); - } - uint32_t hopcount32 = hopcount2; // Convert uint64_t to uint32_t - // Validate hopcount - if (hopcount1 != hopcount2) { - printf("device1=%u,device2=%u,hopcount hip=%u,hopcount smi=%u\n", - (*pos).device1, (*pos).device2, - hopcount1, hopcount32); - TestPassed &= false; - } - // Validate linktype - TestPassed &= validateLinkType(linktype1, linktype2); - } - fntopo_shut_down(); - dlclose(lib_rocm_smi_hdl); - return TestPassed; -} -#endif - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - int numDevices = 0; - getDeviceCount(&numDevices); - if (numDevices < 2) { - printf("No. GPUs found is less than 2. Skipping all Test Case. \n"); - passed(); - } - bool TestPassed = true; - if (p_tests == 0x1) { - TestPassed = testhipInvalidDevice(numDevices); - } else if (p_tests == 0x2) { -#ifdef __linux__ - TestPassed = testMaskedDevice(numDevices); -#else - printf("This test is skipped due to non linux environment.\n"); -#endif - } else if (p_tests == 0x3) { -#ifdef __linux__ - TestPassed = testhipInvalidLinkType(); -#else - printf("This test is skipped due to non linux environment.\n"); -#endif - } else if (p_tests == 0x4) { -#ifdef __linux__ - TestPassed = testhipInvalidHopcount(); -#else - printf("This test is skipped due to non linux environment.\n"); -#endif - } else if (p_tests == 0x5) { -#ifdef __linux__ - TestPassed = testhipSameDevice(numDevices); -#else - printf("This test is skipped due to non linux environment.\n"); -#endif - } else if (p_tests == 0x6) { -#ifdef __linux__ - TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices); -#else - printf("This test is skipped due to non linux environment.\n"); -#endif - } else if (p_tests == 0x7) { - /*TODO:This test is currently ommited from directed test due to existing issues - in rocm-smi. Once rocm-smi issues are resolved, this test will be enabled. */ -#ifdef __linux__ - TestPassed = testhipLinkTypeHopcountDevice(numDevices); -#else - printf("This test is skipped due to non linux environment.\n"); -#endif - } else { - printf("Invalid Test Case \n"); - exit(1); - } - if (TestPassed) { - passed(); - } else { - failed("Test Case %x Failed!", p_tests); - } -} diff --git a/tests/src/runtimeApi/p2p/hipP2pLinkTypeAndHopFunc.h b/tests/src/runtimeApi/p2p/hipP2pLinkTypeAndHopFunc.h deleted file mode 100644 index 4d039d8068..0000000000 --- a/tests/src/runtimeApi/p2p/hipP2pLinkTypeAndHopFunc.h +++ /dev/null @@ -1,113 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_ -#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_ -/** - * rocm_smi.h enums - */ -typedef enum { - RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful - RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid - RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or - //!< action is not available for the - //!< given input, on the given system - RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This - //!< may because the operation is not - //!< supported by the Linux kernel - //!< version running on the executing - //!< machine - RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file - //!< error. Many functions require - //!< root access to run. - RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other - //!< resource - RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught - RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of - //!< allowable or safe range - RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi - //!< initializing internal data - //!< structures - RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR, - RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not - //!< yet been implemented in the - //!< current system for the current - //!< devices - RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not - //!< found - RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were - //!< available for the operation - RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during - //!< execution of function - RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data - //!< was read - RSMI_STATUS_NO_DATA, //!< No data was found for a given - //!< input - RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to - //!< function is not what was expected - RSMI_STATUS_BUSY, //!< A resource or mutex could not be - //!< acquired because it is already - //!< being used - RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter - //!< exceeded INT32_MAX - - RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred -} rsmi_status_t; - -/** - * Types for IO Link returned from rocm_smi - */ -typedef enum _RSMI_IO_LINK_TYPE { - RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type. - RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express - RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI - RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types - RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types -} RSMI_IO_LINK_TYPE; - -/** - * Types for IO Link returned from rocm runtime - */ -typedef enum { - /** - * Hyper-transport bus type. - */ - HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0, - /** - * QPI bus type. - */ - HSA_AMD_LINK_INFO_TYPE_QPI = 1, - /** - * PCIe bus type. - */ - HSA_AMD_LINK_INFO_TYPE_PCIE = 2, - /** - * Infiniband bus type. - */ - HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3, - /** - * xGMI link type. - */ - HSA_AMD_LINK_INFO_TYPE_XGMI = 4 -} hsa_amd_link_info_type_t; - -#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_ diff --git a/tests/src/runtimeApi/stream/StreamAddCallback.cpp b/tests/src/runtimeApi/stream/StreamAddCallback.cpp deleted file mode 100644 index 225cc9245a..0000000000 --- a/tests/src/runtimeApi/stream/StreamAddCallback.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include "test_common.h" -#include - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -enum class ExecState -{ - EXEC_NOT_STARTED, - EXEC_STARTED, - EXEC_CB_STARTED, - EXEC_CB_FINISHED, - EXEC_FINISHED -}; - -struct UserData -{ - size_t size; - int* ptr; -}; - -// Global variable to check exection order -std::atomic gData(ExecState::EXEC_NOT_STARTED); - - -void myCallback(hipStream_t stream, hipError_t status, void* user_data) -{ - if(gData.load() != ExecState::EXEC_STARTED) - return; // Error hence return early - - gData.store(ExecState::EXEC_CB_STARTED); - - UserData* data = reinterpret_cast(user_data); - printf("Callback started\n"); - - std::this_thread::sleep_for (std::chrono::seconds(1)); - - printf("Callback ending.\n"); - gData.store(ExecState::EXEC_CB_FINISHED); -} - -bool test(int count) -{ - printf("\n============ Test iteration %d =============\n",count); - // Stream - hipStream_t stream; - bool result = true; - - gData.store(ExecState::EXEC_STARTED); - - HIPCHECK(hipStreamCreate(&stream)); - - // Array size - size_t size = 10000; - - // Device array - int *data = NULL; - HIPCHECK(hipMalloc((void**)&data, sizeof(int) * size)); - - // Initialize device array to -1 - HIPCHECK(hipMemset(data, -1, sizeof(int) * size)); - - // Host array - int *host = NULL; - HIPCHECK(hipHostMalloc((void**)&host, sizeof(int) * size)); - - // Print host ptr address - printf("In main thread\n"); - - // Initialize user_data for callback - UserData arg; - arg.size = size; - arg.ptr = host; - - // Synchronize device - HIPCHECK(hipDeviceSynchronize()); - - // Asynchronous copy from device to host - HIPCHECK(hipMemcpyAsync(host, data, sizeof(int) * size, hipMemcpyDeviceToHost, stream)); - - // Asynchronous memset on device - HIPCHECK(hipMemsetAsync(data, 0, sizeof(int) * size, stream)); - - // Add callback - should happen after hipMemsetAsync() - HIPCHECK(hipStreamAddCallback(stream, myCallback, &arg, 0)); - - printf("Will wait in main thread until callback completes\n"); - - //This should synchronize the stream (including the callback) - HIPCHECK(hipStreamSynchronize(stream)); - - if(gData.load() != ExecState::EXEC_CB_FINISHED) - { - std::cout<<"Callback is not finished\n"; - return false; - } - printf("Callback completed will resume main thread execution\n"); - - if(host[size/2] != -1) - { - // Print some host data that just got copied - printf("Pseudo host data printing (should be -1): %d\n", host[size/2]); - result = false; - } - - HIPCHECK(hipMemcpy(host, data, sizeof(int)*size, hipMemcpyDeviceToHost)); - - if(host[size-1] != 0) - { - printf("Pseudo host data printing (should be 0): %d\n", host[size-1]); - result = false; - } - - HIPCHECK(hipFree(data)); - HIPCHECK(hipHostFree(host)); - HIPCHECK(hipStreamDestroy(stream)); - - gData.store(ExecState::EXEC_FINISHED); - return result; -} - -int main() -{ - // Test involves multithreading hence running multiple times - // to make sure consitency in the behavior - bool status = true; - - for(int i=0; i < 10; i++){ - status = test(i+1); - if(status == false) - { - failed("Test Failed!\n"); - break; - } - } - - if(status == true) passed(); - return 0; -} diff --git a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp deleted file mode 100644 index e50f98e53d..0000000000 --- a/tests/src/runtimeApi/stream/hipAPIStreamDisable.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * HIT_END - */ - -#include -#include "test_common.h" -#include "hip/math_functions.h" - -const int NN = 1 << 21; - -__global__ void kernel(float* x, float* y, int n) { - int tid = threadIdx.x; - if (tid < 1) { - for (int i = 0; i < n; i++) { - x[i] = sqrt(powf(3.14159, i)); - } - y[tid] = y[tid] + 1.0f; - } -} - -__global__ void nKernel(float* y) { - int tid = threadIdx.x; - y[tid] = y[tid] + 1.0f; -} - -int main() { - const int num_streams = 8; - hipStream_t streams[num_streams]; - float *data[num_streams], *yd, *xd; - float y = 1.0f, x = 1.0f; - HIPCHECK(hipMalloc((void**)&yd, sizeof(float))); - HIPCHECK(hipMalloc((void**)&xd, sizeof(float))); - HIPCHECK(hipMemcpy(yd, &y, sizeof(float), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(xd, &x, sizeof(float), hipMemcpyHostToDevice)); - for (int i = 0; i < num_streams; i++) { - HIPCHECK(hipStreamCreate(&streams[i])); - HIPCHECK(hipMalloc(&data[i], NN * sizeof(float))); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), dim3(1), dim3(1), 0, streams[i], data[i], xd, NN); - hipLaunchKernelGGL(HIP_KERNEL_NAME(nKernel), dim3(1), dim3(1), 0, 0, yd); - } - - HIPCHECK(hipMemcpy(&x, xd, sizeof(float), hipMemcpyDeviceToHost)); - HIPCHECK(hipMemcpy(&y, yd, sizeof(float), hipMemcpyDeviceToHost)); - std::cout << x << " " << y << std::endl; - HIPASSERT(x == y); - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp b/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp deleted file mode 100644 index c9d428ad10..0000000000 --- a/tests/src/runtimeApi/stream/hipAPIStreamEnable.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * HIT_END - */ - -//#define HIP_API_PER_THREAD_DEFAULT_STREAM - -#include -#include "test_common.h" -#include "hip/math_functions.h" - -const int NN = 1 << 21; - -__global__ void kernel(float* x, float* y, int n) { - int tid = threadIdx.x; - if (tid < 1) { - for (int i = 0; i < n; i++) { - x[i] = sqrt(powf(3.14159, i)); - } - y[tid] = y[tid] + 1.0f; - } -} - -__global__ void nKernel(float* y) { - int tid = threadIdx.x; - y[tid] = y[tid] + 1.0f; -} - -int main() { - const int num_streams = 8; - hipStream_t streams[num_streams]; - float *data[num_streams], *yd, *xd; - float y = 1.0f, x = 1.0f; - HIPCHECK(hipMalloc((void**)&yd, sizeof(float))); - HIPCHECK(hipMalloc((void**)&xd, sizeof(float))); - HIPCHECK(hipMemcpy(yd, &y, sizeof(float), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(xd, &x, sizeof(float), hipMemcpyHostToDevice)); - for (int i = 0; i < num_streams; i++) { - HIPCHECK(hipStreamCreate(&streams[i])); - HIPCHECK(hipMalloc(&data[i], NN * sizeof(float))); - hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), dim3(1), dim3(1), 0, streams[i], data[i], xd, N); - hipLaunchKernelGGL(HIP_KERNEL_NAME(nKernel), dim3(1), dim3(1), 0, 0, yd); - } - - HIPCHECK(hipMemcpy(&x, xd, sizeof(float), hipMemcpyDeviceToHost)); - HIPCHECK(hipMemcpy(&y, yd, sizeof(float), hipMemcpyDeviceToHost)); - std::cout << x << " " << y << std::endl; - HIPASSERT(x < y); - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipMultiStreams.cpp b/tests/src/runtimeApi/stream/hipMultiStreams.cpp deleted file mode 100644 index f8f6c44460..0000000000 --- a/tests/src/runtimeApi/stream/hipMultiStreams.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include -#include -#include "test_common.h" - -using namespace std; - -__global__ void kernel_do_nothing() { - // empty kernel -} - -int main(int argc, char* argv[]) { - - constexpr int nLoops = 100000; - constexpr int nStreams = 2; - vector streams(nStreams); - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - return 0; - } - - static int device = 0; - HIPCHECK(hipSetDevice(device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name << endl; - - for (int i = 0; i < nStreams; i++) { - HIPCHECK(hipStreamCreate(&streams[i])); - } - - for (int k = 0; k <= nLoops; ++k) { - HIPCHECK(hipDeviceSynchronize()); - - // Launch kernel with default stream - hipLaunchKernelGGL((kernel_do_nothing), dim3(1), dim3(1), 0, 0); - - // Launch kernel on all streams - for (int i = 0; i < nStreams; i++) { - hipLaunchKernelGGL((kernel_do_nothing), dim3(1), dim3(1), 0, streams[i]); - } - - // Sync stream 1 - HIPCHECK(hipStreamSynchronize(streams[0])); - - if (k % 10000 == 0 || k == nLoops) { - cout << "Info: Iteration = " << k << endl; - } - } - - HIPCHECK(hipDeviceSynchronize()); - - // Clean up - for (int i = 0; i < nStreams; i++) { - HIPCHECK(hipStreamDestroy(streams[i])); - } - - HIPCHECK(hipDeviceReset()); - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipNullStream.cpp b/tests/src/runtimeApi/stream/hipNullStream.cpp deleted file mode 100644 index 8d0b8b16e0..0000000000 --- a/tests/src/runtimeApi/stream/hipNullStream.cpp +++ /dev/null @@ -1,297 +0,0 @@ -/* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#include -#include - -unsigned p_streams = 16; -int p_repeat = 10; -int p_db = 0; - -using namespace std; - -template -__global__ void vectorADDRepeat(const T* A_d, const T* B_d, T* C_d, size_t NELEM, - int repeat) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int j = 1; j <= repeat; j++) { - for (size_t i = offset; i < NELEM; i += stride) { - C_d[i] = A_d[i] * j + B_d[i] * j; - } - }; -} - - -//------ -// Structure for one stream - includes the stream + data buffers that are used by the stream. -template -class Streamer { - public: - Streamer(size_t numElements, bool useNullStream = false); - ~Streamer(); - void enqueAsync(); - void queryUntilComplete(); - - void reset(); - void H2D(); - void D2H(); - - - public: - T* _A_h; - T* _B_h; - T* _C_h; - - T* _A_d; - T* _B_d; - T* _C_d; - - hipStream_t _stream; - hipEvent_t _event; - - size_t _numElements; -}; - -template -Streamer::Streamer(size_t numElements, bool useNullStream) : _numElements(numElements) { - HipTest::initArrays(&_A_d, &_B_d, &_C_d, &_A_h, &_B_h, &_C_h, numElements, true); - - if (useNullStream) { - _stream = 0x0; - } else { - HIPCHECK(hipStreamCreate(&_stream)); - } - HIPCHECK(hipEventCreate(&_event)); - - H2D(); -}; - -template -void Streamer::H2D() { - HIPCHECK(hipMemcpy(_A_d, _A_h, _numElements * sizeof(T), hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(_B_d, _B_h, _numElements * sizeof(T), hipMemcpyHostToDevice)); -} - -template -void Streamer::D2H() { - HIPCHECK(hipMemcpy(_C_h, _C_d, _numElements * sizeof(T), hipMemcpyDeviceToHost)); -} - -template -void Streamer::reset() { - HipTest::setDefaultData(_numElements, _A_h, _B_h, _C_h); - H2D(); -} - - -template -void Streamer::enqueAsync() { - printf("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, - _numElements * sizeof(T) / 1024.0 / 1024.0); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements); - hipLaunchKernelGGL(vectorADDRepeat, dim3(blocks), dim3(threadsPerBlock), 0, _stream, - static_cast(_A_d), static_cast(_B_d), _C_d, _numElements, - p_repeat); -} - -template -void Streamer::queryUntilComplete() { - int numQueries = 0; - hipError_t e = hipSuccess; - do { - numQueries++; - e = hipStreamQuery(_stream); - } while (e != hipSuccess); - - printf("completed after %d queries\n", numQueries); -}; - - -//--- -// Parse arguments specific to this test. -void parseMyArguments(int argc, char* argv[]) { - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, "--streams")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_streams)) { - failed("Bad streams argument"); - } - } else if (!strcmp(arg, "--repeat") || (!strcmp(arg, "-r"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_repeat)) { - failed("Bad repeat argument"); - } - } else { - failed("Bad argument '%s'", arg); - } - }; -}; - - -void printBuffer(std::string name, int* f, size_t numElements) { - std::cout << name << "\n"; - for (size_t i = 0; i < numElements; i++) { - printf("%5zu: %d\n", i, f[i]); - } -} - - -//--- -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, false); - parseMyArguments(argc, argv); - - typedef Streamer IntStreamer; - - std::vector streamers; - - size_t numElements = N; - - int* expected_H = (int*)malloc(numElements * sizeof(int)); - - - auto nullStreamer = new IntStreamer(numElements, true); - - // Expected resultr - last streamer runs vectorADDRepeat, then nullstreamer adds - // lastStreamer->_C_d + lastStreamer->_C_d - for (size_t i = 0; i < numElements; i++) { - expected_H[i] = - ((nullStreamer->_A_h[i]) * p_repeat + (nullStreamer->_B_h[i]) * p_repeat) * 2; - } - - - for (int i = 0; i < p_streams; i++) { - IntStreamer* s = new IntStreamer(numElements); - streamers.push_back(s); - } - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - - for (int s = 1; s < p_streams; s++) { - if (p_tests & (1 << s)) { - printf("==> Test %x runAsnc, #streams=%d\n", (1 << s), s); - nullStreamer->reset(); - - for (int i = 0; i < s; i++) { - streamers[i]->enqueAsync(); - } - - auto lastStreamer = streamers[s - 1]; - - // Dispatch to NULL stream, should wait for prior async activity to complete before - // beginning: - hipLaunchKernelGGL(vectorADDRepeat, dim3(blocks), dim3(threadsPerBlock), 0, - 0 /*nullstream*/, static_cast(lastStreamer->_C_d), - static_cast(lastStreamer->_C_d), nullStreamer->_C_d, - numElements, 1 /*repeat*/); - - - if (p_db) { - HIPCHECK(hipDeviceSynchronize()); - lastStreamer->D2H(); - printBuffer("lastStream _A_h", lastStreamer->_A_h, min(numElements, size_t(20))); - printBuffer("lastStream _B_h", lastStreamer->_B_h, min(numElements, size_t(20))); - printBuffer("lastStream _C_h", lastStreamer->_C_h, min(numElements, size_t(20))); - } - nullStreamer->D2H(); - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); - } - } - - - for (int s = 1; s < p_streams; s += 2) { - unsigned tmask = (0x10000 | (1 << s)); - if (p_tests & tmask) { - nullStreamer->reset(); - printf("==> Test %x runAsnc-odd-only, #streams=%d\n", tmask, s); - for (int i = 0; i < s; i++) { - // RUn just odd streams so we have some empty ones to examine/optimize: - if (i & 0x1) { - streamers[i]->enqueAsync(); - } - } - auto lastStreamer = streamers[s - 1]; - - // Dispatch to NULL stream, should wait for prior async activity to complete before - // beginning: - hipLaunchKernelGGL(vectorADDRepeat, dim3(blocks), dim3(threadsPerBlock), 0, - 0 /*nullstream*/, static_cast(lastStreamer->_C_d), - static_cast(lastStreamer->_C_d), nullStreamer->_C_d, - numElements, 1 /*repeat*/); - - nullStreamer->D2H(); - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkTest(expected_H, nullStreamer->_C_h, numElements); - } - } - - // Expected resultr - last streamer runs vectorADDRepeat - for (size_t i = 0; i < numElements; i++) { - expected_H[i] = ((nullStreamer->_A_h[i]) * p_repeat + (nullStreamer->_B_h[i]) * p_repeat); - } - - if (p_tests & 0x20000) { - assert(p_streams >= 2); // need a couple streams in order to run this test. - nullStreamer->reset(); - printf("\n==> Test hipStreamSynchronize with defaultStream \n"); - - // Enqueue a long-running job to stream1 - streamers[0]->enqueAsync(); - - // Check to see if synchronizing on a null stream synchronizes all other streams or just the - // null stream. This function follows null stream semantics and will wait for all other - // blocking streams before returning. This will wait on the host - HIPCHECK(hipStreamSynchronize(0)); - - // Copy with stream1, this could go async if the streamSync doesn't synchronize ALL the - // streams. - HIPCHECK(hipMemcpyAsync(streamers[0]->_C_h, streamers[0]->_C_d, - streamers[0]->_numElements * sizeof(int), hipMemcpyDeviceToHost, - streamers[1]->_stream)); - - - HIPCHECK(hipDeviceSynchronize()); - - HipTest::checkTest(expected_H, streamers[0]->_C_h, numElements); - } - - for (auto it : streamers) { - hipStreamDestroy(it->_stream); - } - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStream.h b/tests/src/runtimeApi/stream/hipStream.h deleted file mode 100644 index dc81eeb2e8..0000000000 --- a/tests/src/runtimeApi/stream/hipStream.h +++ /dev/null @@ -1,117 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIPSTREAM_H -#define HIPSTREAM_H -#include "hip/hip_runtime.h" - -#define NUM_STREAMS 4 - -/* - * H2H - 1 - * H2D - 2 - * KER - 3 - * D2D - 4 - * D2H - 5 - */ - -template -void H2HAsync(T* Dst, T* Src, size_t size, hipStream_t stream) { - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); -} - -template -void H2DAsync(T* Dst, T* Src, size_t size, hipStream_t stream) { - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); -} - -template -void D2DAsync(T* Dst, T* Src, size_t size, hipStream_t stream) { - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); -} - -template -void D2HAsync(T* Dst, T* Src, size_t size, hipStream_t stream) { - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); -} - -template -void H2H(T* Dst, T* Src, size_t size) { - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); -} - -template -void H2D(T* Dst, T* Src, size_t size) { - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); -} - -template -void D2D(T* Dst, T* Src, size_t size) { - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); -} - -template -void D2H(T* Dst, T* Src, size_t size) { - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); -} - -template -__global__ void Inc(T* In) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - In[tx] = In[tx] + 1; -} - -template -void initArrays(T** Ad, T** Ah, size_t N, bool usePinnedHost = false) { - size_t NBytes = N * sizeof(T); - if (Ad) { - HIPCHECK(hipMalloc(Ad, NBytes)); - } - if (usePinnedHost) { - HIPCHECK(hipHostMalloc((void**)Ah, NBytes, hipHostMallocDefault)); - } else { - *Ah = new T[N]; - HIPASSERT(*Ah != NULL); - } -} - -template -void initArrays(T** Ad, size_t N, bool deviceMemory = false, bool usePinnedHost = false) { - size_t NBytes = N * sizeof(T); - if (deviceMemory) { - HIPCHECK(hipMalloc(Ad, NBytes)); - } else { - if (usePinnedHost) { - HIPCHECK(hipHostMalloc((void**)Ad, NBytes, hipHostMallocDefault)); - } else { - *Ad = new T[N]; - HIPASSERT(*Ad != NULL); - } - } -} - -template -void setArray(T* Array, int N, T val) { - for (int i = 0; i < N; i++) { - Array[i] = val; - } -} - - -#endif diff --git a/tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp b/tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp deleted file mode 100644 index 694375925b..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp +++ /dev/null @@ -1,207 +0,0 @@ - /* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - - - -// Testcase Description: This test case is used to verify if the callback -// function called through hipStreamAddCallback() api completes the execution -// in order as hipStreamAddCallback() api queued in their respective streams - - - -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - - -hipStream_t mystream1, mystream2; -size_t Num = 4096; -std::vector Stream1_Order, Stream2_Order; - - -__global__ void vector_square(float* C_d, float* A_d, size_t Num) { - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < Num; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay thread 1 only in the GPU - if (gputhread == 1) { - unsigned long long int wait_t = 3200000000, start = clock64(), cur; - do { - cur = clock64() - start; - } while (cur < wait_t); - } -} - -__global__ void vector_square_gfx11(float* C_d, float* A_d, size_t Num) { -#ifdef __HIP_PLATFORM_AMD__ - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < Num; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay thread 1 only in the GPU - if (gputhread == 1) { - unsigned long long int wait_t = 3200000000, start = wall_clock64(), cur; - do { - cur = wall_clock64() - start; - } while (cur < wait_t); - } -#endif -} - -float *A_h, *C_h, *A_h1, *C_h1; - -static void HIPRT_CB Callback_Stream1(hipStream_t stream, hipError_t status, - void* userData) { - for (size_t i = 0; i < Num; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - std::cout << "Data mismatch in stream1 at: " << i << std::endl; - } - } - - // Storing the int passed into this callback into Stream1_Order - // this will help verify the order in which this Callback function - // is called. - Stream1_Order.push_back(*(reinterpret_cast(userData))); - delete reinterpret_cast(userData); -} - -static void HIPRT_CB Callback_Stream2(hipStream_t stream, hipError_t status, - void* userData) { - for (size_t i = 0; i < Num; i++) { - if (C_h1[i] != A_h1[i] * A_h1[i]) { - std::cout << "Data mismatch in stream2 at: " << i << std::endl; - } - } - // Storing the int passed into this callback into Stream2_Order - // this will help verify the order in which this Callback function - // is called. - Stream2_Order.push_back(*(reinterpret_cast(userData))); - delete reinterpret_cast(userData); -} - -int main(int argc, char* argv[]) { - float *A_d, *C_d; - size_t Nbytes = Num * sizeof(float); - - A_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - A_h1 = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h1 = reinterpret_cast(malloc(Nbytes)); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < Num; i++) { - A_h[i] = 1.618f + i; - } - for (size_t i = 0; i < Num; i++) { - A_h1[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - HIPCHECK(hipStreamCreateWithFlags(&mystream1, hipStreamNonBlocking)); - HIPCHECK(hipStreamCreateWithFlags(&mystream2, hipStreamNonBlocking)); - - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream1)); - - const unsigned threadsPerBlock = 256; - const unsigned blocks = (Num + 255)/threadsPerBlock; - int *ptr = NULL; - int *ptr1 = NULL; - // Queing jobs in both mystream1/2 followed by hipStreamAddCallback - auto vector_square_used = IsGfx11() ? vector_square_gfx11 : vector_square; - for (int i = 1; i < 5; ++i) { - hipLaunchKernelGGL((vector_square_used), dim3(blocks), dim3(threadsPerBlock), - 0, mystream1, C_d, A_d, Num); - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, - mystream1)); - ptr = new int; - *ptr = i; - HIPCHECK(hipStreamAddCallback(mystream1, Callback_Stream1, - reinterpret_cast(ptr), 0)); - - hipLaunchKernelGGL((vector_square_used), dim3(blocks), dim3(threadsPerBlock), - 0, mystream2, C_d, A_d, Num); - HIPCHECK(hipMemcpyAsync(C_h1, C_d, Nbytes, - hipMemcpyDeviceToHost, mystream2)); - ptr1 = new int; - *ptr1 = i; - HIPCHECK(hipStreamAddCallback(mystream2, Callback_Stream2, - reinterpret_cast(ptr1), 0)); - } - - HIPCHECK(hipStreamSynchronize(mystream1)); - HIPCHECK(hipStreamSynchronize(mystream2)); - - HIPCHECK(hipStreamDestroy(mystream1)); - HIPCHECK(hipStreamDestroy(mystream2)); - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - free(A_h); - free(C_h); - free(A_h1); - free(C_h1); - - // Checking if Stream1_Order has ints in sequencial order or not - int i = 1; - for (auto itr=Stream1_Order.begin(); itr != Stream1_Order.end(); ++itr) { - if (*itr != i) { - printf("hipStreamAddCallBack() did not execute in sequence"); - printf(" in first stream\n"); - failed("Unexpected behavior!"); - } - ++i; - } - - // Checking if Stream2_Order has ints in sequencial order or not - i = 1; - for (auto itr=Stream2_Order.begin(); itr != Stream2_Order.end(); ++itr) { - if (*itr != i) { - printf("hipStreamAddCallBack() did not execute in sequence"); - printf(" in second stream\n"); - failed("Unexpected behavior!"); - } - ++i; - } - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp b/tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp deleted file mode 100644 index d56ce2b3c0..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp +++ /dev/null @@ -1,182 +0,0 @@ -/* - Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -// Testcase Description: Streams are launched in individual GPUs with different -// kernel. Verify that all the kernels queued are executed before the callback. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include -#ifdef __linux__ -#include -#endif -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - - -size_t N_ELMTS = 4096; - -// Data structure for holding and validating data -struct gpu_data { - int *int_ptr = NULL; - int gpu; - int acknowledge; -}; - -enum { - SUCCESS = 0, - KERNEL_EXECUTION_MISMATCH, - KERNEL_COMPUTATION_MISMATCH -}; - -__global__ void Add_Data(int* A_d, size_t N_ELMTS) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - for (size_t i = offset; i < N_ELMTS; i += stride) { - // Increment the value of A_d[i] by 1 - A_d[i] = A_d[i] + 1; - } -} - -// below kernel is just to load the gpu with multiple jobs -__global__ void Square_plus_one(int* A_d, int* C_d, size_t N_ELMTS) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - for (size_t i = offset; i < N_ELMTS; i += stride) { - C_d[i] = A_d[i]*A_d[i] + 1; - } -} - -static void HIPRT_CB Stream_Callback(hipStream_t stream, hipError_t status, - void* userData) { - gpu_data *ptr = reinterpret_cast(userData); - - // int_ptr in the passed userData will contain the data copied from device to - // host. Expected data in this field is the gpu ordinal. - if (*((*ptr).int_ptr) != (*ptr).gpu + 1) { - (*ptr).acknowledge = 100; // Assign unexpected value to indicate fail - } else { - (*ptr).acknowledge = (*ptr).gpu; // Assign the gpu ordinal received - } -} - -void launch_gpu(int gpu_ordinal) { - HIPCHECK(hipSetDevice(gpu_ordinal)); - int *A_d, *A_h, *C_h, *C_d; - size_t Nbytes = N_ELMTS * sizeof(int), Data_mismatch = 0; - bool cb = false; - A_h = (int *)malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (int *)malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with 0 - for (size_t i = 0; i < N_ELMTS; i++) { - A_h[i] = 0; - } - - // setting gpu value in the struct object - gpu_data *ptr = new gpu_data; - ptr->int_ptr = C_h; - ptr->gpu = gpu_ordinal; - ptr->acknowledge = 100; - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - hipStream_t mystream; - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - - const unsigned threadsPerBlock = 256; - const unsigned blocks = (N_ELMTS + 255)/threadsPerBlock; - - // A_d is initialized to 0. Add_Data kernel does A_d = A_d + 1 - // The Add_data kernel is called 1 time for gpu0, 2 times for gpu1 etc. - // At the end of the loop, A_d should have the gpu_ordinal number - for (int i = 0; i < gpu_ordinal + 1; i++) { - hipLaunchKernelGGL(Add_Data, dim3(blocks), dim3(threadsPerBlock), 0, - mystream, A_d, N_ELMTS); - hipLaunchKernelGGL(Square_plus_one, 1, 1, 0, mystream, A_d, C_d, N_ELMTS); - } - HIPCHECK(hipMemcpyAsync(C_h, A_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - - // Pass the ptr as user data which contains the gpu_ordinal, default value - // for ack and the data that is copied to host - HIPCHECK(hipStreamAddCallback(mystream, Stream_Callback, - reinterpret_cast(ptr), 0)); - HIPCHECK(hipStreamSynchronize(mystream)); - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - HIPCHECK(hipStreamDestroy(mystream)); - - int result = SUCCESS; - if (C_h[0] != gpu_ordinal + 1) { - result = KERNEL_EXECUTION_MISMATCH; - } - - if (ptr->gpu != ptr->acknowledge) { - result = KERNEL_COMPUTATION_MISMATCH; - } - - free(A_h); - free(C_h); - free(ptr); - - if (result == KERNEL_EXECUTION_MISMATCH) { - failed("Number of kernels expected to be executed does not match"); - } else if (result == KERNEL_COMPUTATION_MISMATCH) { - failed("Mismatch found in the result of the computation!"); - } -} - - -int main() { - int gpu_cnt = 0; - - HIPCHECK(hipGetDeviceCount(&gpu_cnt)); - if (gpu_cnt < 2) { - printf("Minimum of 2 gpus are needed for this test, skipping the test\n"); - passed(); - } - - std::thread T[gpu_cnt]; - - // Launching threads for each GPU - for (int i = 0; i < gpu_cnt; i++) { - T[i] = std::thread(launch_gpu, i); - } - - for (int i=0; i < gpu_cnt; i++) { - T[i].join(); - } - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp b/tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp deleted file mode 100644 index 86dd214361..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -// Testcase Description:: This test case is used to check if the runtime is ok -// when hipStreamAddCallback() is called back to back multiple calls - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - - -#include -#ifdef __linux__ -#include -#endif -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - -#define NUM_CALLS 1000 - -hipStream_t mystream; -size_t Num = 4096; -std::atomicCb_count{0}, Data_mismatch{0}; -float *A_h, *C_h; - -__global__ void vector_square(float* C_d, float* A_d, size_t Num) { - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < Num; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay thread 1 only in the GPU - if (gputhread == 1) { - unsigned long long int wait_t = 3200000000, start = clock64(), cur; - do { - cur = clock64() - start; - } while (cur < wait_t); - } -} - -__global__ void vector_square_gfx11(float* C_d, float* A_d, size_t Num) { -#ifdef __HIP_PLATFORM_AMD__ - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < Num; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay thread 1 only in the GPU - if (gputhread == 1) { - unsigned long long int wait_t = 3200000000, start = wall_clock64(), cur; - do { - cur = wall_clock64() - start; - } while (cur < wait_t); - } -#endif -} - -static void HIPRT_CB Stream_Callback(hipStream_t stream, hipError_t status, - void* userData) { - for (size_t i = 0; i < Num; i++) { - // Validate the data and update Data_mismatch - if (C_h[i] != A_h[i] * A_h[i]) { - Data_mismatch++; - } - } - - // Increment the Cb_count to indicate that the callback is processed. - ++Cb_count; -} - -int main(int argc, char* argv[]) { - float *A_d, *C_d; - size_t Nbytes = Num * sizeof(float); - - A_h = (float*)malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (float*)malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < Num; i++) { - A_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - - const unsigned threadsPerBlock = 256; - const unsigned blocks = (Num+255)/threadsPerBlock; - auto vector_square_used = IsGfx11() ? vector_square_gfx11 : vector_square; - hipLaunchKernelGGL((vector_square_used), dim3(blocks), dim3(threadsPerBlock), 0, - mystream, C_d, A_d, Num); - - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - - // Add multiple callbacks to the stream - for (int i = 0; i< NUM_CALLS; i++) { - HIPCHECK(hipStreamAddCallback(mystream, Stream_Callback, NULL, 0)); - } - - HIPCHECK(hipStreamSynchronize(mystream)); - HIPCHECK(hipStreamDestroy(mystream)); - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - - free(A_h); - free(C_h); - - // Each callback would have validated the data and if any mismatch is found, - // Data_mismatch will not have proper data. Validate the same. - // Cb_count should match the number of callbacks added. - if (Data_mismatch.load() != 0) { - failed("Mismatch found in the result of the computation!"); - } else if (Cb_count.load() != NUM_CALLS) { - failed("All callbacks for stream did not get called!"); - } - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp b/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp deleted file mode 100644 index e6b702fa3b..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// Testcase Description: This test case is used to check the behaviour of HIP -// when multiple hipStreaAddCallback() are called over multiple Threads -// This test case is disabled currently. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - - - -#include -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - -#define NUM_THREADS 2000 - -size_t Num = 4096; -std::atomicCb_count{0}, Data_mismatch{0}; -hipStream_t mystream; -float *A_h, *C_h; - -__global__ void vector_square(float* C_d, float* A_d, size_t Num) { - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < Num; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay thread 1 only in the GPU - if (gputhread == 1) { - unsigned long long int wait_t = 3200000000, start = clock64(), cur; - do { - cur = clock64() - start; - } while (cur < wait_t); - } -} - -__global__ void vector_square_gfx11(float* C_d, float* A_d, size_t Num) { -#ifdef __HIP_PLATFORM_AMD__ - size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = gputhread; i < Num; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay thread 1 only in the GPU - if (gputhread == 1) { - unsigned long long int wait_t = 3200000000, start = wall_clock64(), cur; - do { - cur = wall_clock64() - start; - } while (cur < wait_t); - } -#endif -} - -static void HIPRT_CB Thread1_Callback(hipStream_t stream, hipError_t status, - void* userData) { - for (size_t i = 0; i < Num; i++) { - // Validate the data and update Data_mismatch - if (C_h[i] != A_h[i] * A_h[i]) { - Data_mismatch++; - } - } - - // Increment the Cb_count to indicate that the callback is processed. - ++Cb_count; -} - -static void HIPRT_CB Thread2_Callback(hipStream_t stream, hipError_t status, - void* userData) { - for (size_t i = 0; i < Num; i++) { - // Validate the data and update Data_mismatch - if (C_h[i] != A_h[i] * A_h[i]) { - Data_mismatch++; - } - } - - // Increment the Cb_count to indicate that the callback is processed. - ++Cb_count; -} - -void Thread1_func() { - HIPCHECK(hipStreamAddCallback(mystream, Thread1_Callback, NULL, 0)); -} - -void Thread2_func() { - HIPCHECK(hipStreamAddCallback(mystream, Thread2_Callback, NULL, 0)); -} - - -int main(int argc, char* argv[]) { - float *A_d, *C_d; - size_t Nbytes = Num * sizeof(float); - - A_h = (float*)malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (float*)malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < Num; i++) { - A_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - - const unsigned threadsPerBlock = 256; - const unsigned blocks = (Num+255)/threadsPerBlock; - - auto vector_square_used = IsGfx11() ? vector_square_gfx11 : vector_square; - hipLaunchKernelGGL((vector_square_used), dim3(blocks), dim3(threadsPerBlock), 0, - mystream, C_d, A_d, Num); - - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - - auto thread_count = getHostThreadCount(200, NUM_THREADS); - if (thread_count == 0) { - failed("Thread count is 0"); - } - std::thread *T = new std::thread[thread_count]; - for (int i = 0; i < thread_count; i++) { - // Use different callback for every even thread - // The callbacks will be added to same stream from different threads - if ((i%2) == 0) - T[i] = std::thread(Thread1_func); - else - T[i] = std::thread(Thread2_func); - } - - // Wait until all the threads finish their execution - for (int i = 0; i < thread_count; i++) { - T[i].join(); - } - - HIPCHECK(hipStreamSynchronize(mystream)); - HIPCHECK(hipStreamDestroy(mystream)); - - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - - free(A_h); - free(C_h); - - // Cb_count should match total number of callbacks added from both threads - // Data_mismatch will be updated if there is problem in data validation - if (Cb_count.load() != thread_count) { - failed("All callbacks for stream did not get called!"); - } else if (Data_mismatch.load() != 0) { - failed("Mismatch found in the result of the computation!"); - } - delete[] T; - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp b/tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp deleted file mode 100644 index d079e6eb38..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/* -* Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to deal -* in the Software without restriction, including without limitation the rights -* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -* THE SOFTWARE. -*/ - -// Testcase Description: This test case checks whether hipStreamSynchronize() is taking -// lesser time after the Callback() function launched by hipStreamAddCallback() api. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - -#define SECONDS_TO_WAIT 2 -#define TO_MICROSECONDS 1000000 - -hipStream_t mystream; -size_t N_elmts = 4096; -bool cbDone = false; -std::atomic Data_mismatch{0}; - -__global__ void vector_square(float* C_d, float* A_d, size_t N_elmts) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N_elmts; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay the thread 1 - if (offset == 1) { - unsigned long long int wait_t = 3200000000, start = clock64(), cur; - do { - cur = clock64() - start; - } while (cur < wait_t); - } -} - -__global__ void vector_square_gfx11(float* C_d, float* A_d, size_t N_elmts) { -#ifdef __HIP_PLATFORM_AMD__ - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N_elmts; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } - - // Delay the thread 1 - if (offset == 1) { - unsigned long long int wait_t = 3200000000, start = wall_clock64(), cur; - do { - cur = wall_clock64() - start; - } while (cur < wait_t); - } -#endif -} - -float *A_h, *C_h; - -static void HIPRT_CB Callback1(hipStream_t stream, hipError_t status, - void* userData) { - // Validate the data - for (size_t i = 0; i < N_elmts; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - Data_mismatch++; - } - } - - // Delay the callback completion - std::this_thread::sleep_for (std::chrono::seconds(SECONDS_TO_WAIT)); - cbDone = true; -} - -int main(int argc, char* argv[]) { - float *A_d, *C_d; - size_t Nbytes = N_elmts * sizeof(float); - float tElapsed = 1.0f; - - A_h = (float*)malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (float*)malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < N_elmts; i++) { - A_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - - const unsigned threadsPerBlock = 256; - const unsigned blocks = (N_elmts + 255)/threadsPerBlock; - - auto vector_square_used = IsGfx11() ? vector_square_gfx11 : vector_square; - hipLaunchKernelGGL((vector_square_used), dim3(blocks), dim3(threadsPerBlock), 0, - mystream, C_d, A_d, N_elmts); - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - HIPCHECK(hipStreamAddCallback(mystream, Callback1, NULL, 0)); - - // Wait untill Callback() function changes the cbDone value to true - while (!cbDone) { - std::this_thread::sleep_for (std::chrono::milliseconds(10)); - } - - // Since the callback is supposed to be called only after an implicit stream - // synchronization, and the runtime cannot continue until the callback is done - // hipStreamSynchronize call should not take much time. - auto start = std::chrono::high_resolution_clock::now(); - HIPCHECK(hipStreamSynchronize(mystream)); - auto stop = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(stop - start); - - HIPCHECK(hipStreamDestroy(mystream)); - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(C_d)); - free(A_h); - free(C_h); - - if (Data_mismatch.load() != 0) { - failed("Output from kernel execution is not as expected"); - } - - // HIP runtime cannot proceed further in the queue until callback completes - // Stream synchronize should not have much task to do after callback - // It should just be an extra empty marker wait - // Therefore the hipStreamSynchronize() in the - // main thread should hardly take any time to complete. - - if (duration.count() < 200) { - passed(); - } else { - failed("hipStreamSynchronize is taking more time than expected after Callback()"); - } -} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp b/tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp deleted file mode 100644 index ec0861c8fb..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -// Testcase Description: This test case tests if Host thread continues with -// next command after hipStreamAddCallback() api or wait for callback() call to -// finish. Ideally Host thread should not wait for callback to finish. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - -bool Callback_Completed = false; - -void HIPRT_CB Callback1(hipStream_t stream, hipError_t status, void* userData) { - std::this_thread::sleep_for (std::chrono::seconds(5)); - Callback_Completed = true; -} - -int main(int argc, char* argv[]) { - hipStream_t mystream; - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - HIPCHECK(hipStreamAddCallback(mystream, Callback1, NULL, 0)); - std::this_thread::sleep_for (std::chrono::seconds(1)); - - // Callback_Completed is initialized to false. The same is set to true at - // the end of callback and callback sleeps for 5 seconds. - // So, in case Callback_Completed is true here, it means the main thread - // has waited till callback is complete and is a fail case. - if (Callback_Completed == false) { - HIPCHECK(hipStreamDestroy(mystream)); - passed(); - } else { - HIPCHECK(hipStreamDestroy(mystream)); - failed("Unexpected: Host thread is waiting for callback to finish"); - } -} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_order.cpp b/tests/src/runtimeApi/stream/hipStreamACb_order.cpp deleted file mode 100644 index 982191455f..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamACb_order.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - -// Checks the callback execution in the same order it was added -// Also, it checks if the number of callbacks executed are same as the number -// of callbacks added - -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - -#define NUM_CALLS 10 -hipStream_t mystream; -bool Callback_SequenceMismatch = false; -std::atomic Cb_ordinal{0}; - -void HIPRT_CB Stream_Callback(hipStream_t stream, hipError_t status, - void* userData) { - // Userdata has the order of the callback. It should match with - // the callback counter Cb_ordinal as the sequence of callback - // should match the sequence of callback addition - if (*(reinterpret_cast(userData)) == Cb_ordinal) { - // Increment the Cb_ordinal to prepare for next sequence - Cb_ordinal++; - } else { - Callback_SequenceMismatch = true; - } - - delete reinterpret_cast(userData); -} - -int main(int argc, char* argv[]) { - int *ptr; - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - for (int i = 0; i< NUM_CALLS; i++) { - ptr = new int; - *ptr = i; - // Pass the userdata with the order of the callback addition - HIPCHECK(hipStreamAddCallback(mystream, Stream_Callback, - reinterpret_cast(ptr), 0)); - } - - HIPCHECK(hipStreamSynchronize(mystream)); - HIPCHECK(hipStreamDestroy(mystream)); - - if (!(Cb_ordinal == (NUM_CALLS))) { - failed("All callbacks for stream did not get called!"); - } - - if (Callback_SequenceMismatch == false) { - passed(); - } else { - failed("hipStreamAddCallback() calls did not execute in sequence!"); - } -} diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp deleted file mode 100644 index de4bbe5c14..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "hip/hip_runtime.h" -#include "test_common.h" - -#ifdef __HIP_PLATFORM_AMD__ -#define HIPRT_CB -#endif - -__global__ void vector_square(float* C_d, float* A_d, size_t N) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } -} - -float *A_h, *C_h; -bool cbDone = false; - -static void HIPRT_CB Callback(hipStream_t stream, hipError_t status, void* userData) { - for (size_t i = 0; i < N; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - warn("Data mismatch %zu", i); - } - } - printf("PASSED!\n"); - cbDone = true; -} - -int main(int argc, char* argv[]) { - float *A_d, *C_d; - size_t Nbytes = N * sizeof(float); - - A_h = (float*)malloc(Nbytes); - HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - C_h = (float*)malloc(Nbytes); - HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Fill with Phi + i - for (size_t i = 0; i < N; i++) { - A_h[i] = 1.618f + i; - } - - HIPCHECK(hipMalloc(&A_d, Nbytes)); - HIPCHECK(hipMalloc(&C_d, Nbytes)); - - hipStream_t mystream; - HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); - - HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); - - const unsigned blocks = 512; - const unsigned threadsPerBlock = 256; - hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream, C_d, A_d, - N); - - HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - HIPCHECK(hipStreamAddCallback(mystream, Callback, NULL, 0)); - - while (!cbDone) std::this_thread::sleep_for(std::chrono::milliseconds(10)); -} diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp deleted file mode 100644 index 59d05204ee..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp +++ /dev/null @@ -1,409 +0,0 @@ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include "test_common.h" - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#define WORKAROUND 1 // Enable (1) this to make stream thread-safe by a workaround - -template // = queue blocks, until task is finished in enqueue(queue,task) -class QueueHipRt; - -// Queue types used in the tests -using TestQueues = std::tuple, QueueHipRt>; - - -// --- Implementation - -#define HIP_ASSERT(x) (assert((x)==hipSuccess)) -#define HIP_ASSERT_IGNORE(x,ign) auto err=x; HIP_ASSERT(err==ign ? hipSuccess : err) - -#ifdef __HIP_PLATFORM_AMD__ - #define HIPRT_CB -#endif - -template -static auto currentThreadWaitFor(QueueHipRt const & queue) -> void; - -template -class QueueHipRt -{ -public: - static constexpr bool isBlocking = IsBlocking; - //----------------------------------------------------------------------------- - QueueHipRt( - int dev) : - m_dev(dev), - m_HipQueue() - { - HIP_ASSERT( - hipSetDevice( - m_dev)); - HIP_ASSERT( - hipStreamCreateWithFlags( - &m_HipQueue, - hipStreamNonBlocking)); - } - //----------------------------------------------------------------------------- - QueueHipRt(QueueHipRt const &) = delete; - //----------------------------------------------------------------------------- - QueueHipRt(QueueHipRt &&) = delete; - //----------------------------------------------------------------------------- - auto operator=(QueueHipRt const &) -> QueueHipRt & = delete; - //----------------------------------------------------------------------------- - auto operator=(QueueHipRt &&) -> QueueHipRt & = delete; - //----------------------------------------------------------------------------- - ~QueueHipRt() - { - if(isBlocking) { -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - // we are a non-blocking queue, so we have to wait here with its destruction until all spawned tasks have been processed - currentThreadWaitFor(*this); -#endif - } - HIP_ASSERT( - hipSetDevice( - m_dev)); - HIP_ASSERT( - hipStreamDestroy( - m_HipQueue)); - } - -public: - int m_dev; //!< The device this queue is bound to. - hipStream_t m_HipQueue; - -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - int m_callees = 0; - std::mutex m_mutex; -#endif -}; - -template -struct Enqueue -{ - //############################################################################# - enum class CallbackState - { - enqueued, - notified, - finished, - }; - - //############################################################################# - struct CallbackSynchronizationData : public std::enable_shared_from_this - { - std::mutex m_mutex; - std::condition_variable m_event; - CallbackState state = CallbackState::enqueued; - }; - - //----------------------------------------------------------------------------- - static void HIPRT_CB hipRtCallback(hipStream_t /*queue*/, hipError_t /*status*/, void *arg) - { - // explicitly copy the shared_ptr so that this method holds the state even when the executing thread has already finished. - const auto pCallbackSynchronizationData = reinterpret_cast(arg)->shared_from_this(); - - // Notify the executing thread. - { - std::unique_lock lock(pCallbackSynchronizationData->m_mutex); - pCallbackSynchronizationData->state = CallbackState::notified; - } - pCallbackSynchronizationData->m_event.notify_one(); - - // Wait for the executing thread to finish the task if it has not already finished. - std::unique_lock lock(pCallbackSynchronizationData->m_mutex); - if(pCallbackSynchronizationData->state != CallbackState::finished) - { - pCallbackSynchronizationData->m_event.wait( - lock, - [pCallbackSynchronizationData](){ - return pCallbackSynchronizationData->state == CallbackState::finished; - } - ); - } - } - - //----------------------------------------------------------------------------- - template - static auto enqueue( - QueueHipRt & queue, - TTask const & task) - -> void - { - -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - { - // thread-safe callee incrementing - std::lock_guard guard(queue.m_mutex); - queue.m_callees += 1; - } -#endif - auto pCallbackSynchronizationData = std::make_shared(); - // test example: https://github.com/ROCm-Developer-Tools/HIP/blob/roc-1.9.x/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp - HIP_ASSERT(hipStreamAddCallback( - queue.m_HipQueue, - hipRtCallback, - pCallbackSynchronizationData.get(), - 0u)); - - // We start a new std::thread which stores the task to be executed. - // This circumvents the limitation that it is not possible to call HIP methods within the HIP callback thread. - // The HIP thread signals the std::thread when it is ready to execute the task. - // The HIP thread is waiting for the std::thread to signal that it is finished executing the task - // before it executes the next task in the queue (HIP stream). - std::thread t( - [pCallbackSynchronizationData, - task -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - ,&queue // requires queue's destructor to wait for all tasks -#endif - ](){ - -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - // thread-safe task execution and callee decrementing - std::lock_guard guard(queue.m_mutex); -#endif - - // If the callback has not yet been called, we wait for it. - { - std::unique_lock lock(pCallbackSynchronizationData->m_mutex); - if(pCallbackSynchronizationData->state != CallbackState::notified) - { - pCallbackSynchronizationData->m_event.wait( - lock, - [pCallbackSynchronizationData](){ - return pCallbackSynchronizationData->state == CallbackState::notified; - } - ); - } - - task(); - - // Notify the waiting HIP thread. - pCallbackSynchronizationData->state = CallbackState::finished; - } - pCallbackSynchronizationData->m_event.notify_one(); -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - queue.m_callees -= 1; -#endif - } - ); - if(isBlocking) - t.join(); // => waiting for task completion - else - t.detach(); // => do not wait for task completion - } -}; -//############################################################################# -//! The HIP RT non-blocking queue test trait specialization. -struct Empty -{ - //----------------------------------------------------------------------------- - template - static auto empty( - QueueHipRt const & queue) - -> bool - { - -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - return (queue.m_callees==0); -#else - - // Query is allowed even for queues on non current device. - hipError_t ret = hipSuccess; - HIP_ASSERT_IGNORE( - ret = hipStreamQuery( - queue.m_HipQueue), - hipErrorNotReady); - return (ret == hipSuccess); -#endif - } -}; - -template -auto currentThreadWaitFor(QueueHipRt const & queue) -> void -{ -#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) - while(queue.m_callees>0) { - std::this_thread::sleep_for(std::chrono::milliseconds(10u)); - } -#else - // Sync is allowed even for queues on non current device. - HIP_ASSERT( hipStreamSynchronize( - queue.m_HipQueue)); -#endif -} - - - - -// --- Tests - -#define TEMPLATE_LIST_TEST_CASE(TestName) \ -template static void TestName (std::atomic &check); \ -static int TestName##Runner () { \ - std::atomic check{0}; \ - TestName< QueueHipRt >(check); \ - fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ - TestName< QueueHipRt >(check); \ - fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ - return check.load(); \ -} \ -template static void TestName (std::atomic &check) - -// add 1 if a check fails -#define CHECK(result) do{int arg=(!(result)); fprintf(stderr, "Checking " #result " %d\n", arg); check.fetch_add(arg);}while(false) - -//----------------------------------------------------------------------------- -TEMPLATE_LIST_TEST_CASE( queueIsInitiallyEmpty ) -{ - TestType queue{0}; - CHECK(Empty::empty(queue)); -} - -//----------------------------------------------------------------------------- -TEMPLATE_LIST_TEST_CASE( queueCallbackIsWorking ) -{ - std::promise promise; - auto task = [&](){ promise.set_value(true); }; - TestType queue{0}; - Enqueue enqueue; - enqueue.enqueue( - queue, - task - ); - - CHECK(promise.get_future().get()); -} - -//----------------------------------------------------------------------------- -TEMPLATE_LIST_TEST_CASE( queueWaitShouldWork ) -{ - bool CallbackFinished = false; - auto task = - [&CallbackFinished]() noexcept - { - std::this_thread::sleep_for(std::chrono::milliseconds(100u)); - CallbackFinished = true; - }; - TestType queue{0}; - Enqueue enqueue; - enqueue.enqueue( - queue, - task - ); - - currentThreadWaitFor(queue); - CHECK(CallbackFinished); -} - -//----------------------------------------------------------------------------- -TEMPLATE_LIST_TEST_CASE( queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinished ) -{ - bool CallbackFinished = false; - TestType queue{0}; - auto task = [&queue, &CallbackFinished, &check]() noexcept - { - CHECK(!Empty::empty(queue)); - std::this_thread::sleep_for(std::chrono::milliseconds(100u)); - CallbackFinished = true; - }; - Enqueue enqueue; - enqueue.enqueue( - queue, - task - ); - // A non-blocking queue will always stay empty because the task has been executed immediately. - if(!TestType::isBlocking) - { - currentThreadWaitFor(queue); - } - - CHECK(Empty::empty(queue)); - CHECK(CallbackFinished); -} - -//----------------------------------------------------------------------------- -TEMPLATE_LIST_TEST_CASE( queueShouldNotExecuteTasksInParallel ) -{ - std::atomic taskIsExecuting(false); - std::promise firstTaskFinished; - std::future firstTaskFinishedFuture = firstTaskFinished.get_future(); - std::promise secondTaskFinished; - std::future secondTaskFinishedFuture = secondTaskFinished.get_future(); - - TestType queue{0}; - - std::thread thread1( - [&queue, &taskIsExecuting, &firstTaskFinished, &check]() - { - auto task1 = [&taskIsExecuting, &firstTaskFinished, &check]() noexcept - { - CHECK(!taskIsExecuting.exchange(true)); - std::this_thread::sleep_for(std::chrono::milliseconds(100u)); - CHECK(taskIsExecuting.exchange(false)); - firstTaskFinished.set_value(); - }; - Enqueue enqueue; - enqueue.enqueue( - queue, - task1 - ); - }); - - std::thread thread2( - [&queue, &taskIsExecuting, &secondTaskFinished, &check]() - { - auto task2 = [&taskIsExecuting, &secondTaskFinished, &check]() noexcept - { - CHECK(!taskIsExecuting.exchange(true)); - std::this_thread::sleep_for(std::chrono::milliseconds(100u)); - CHECK(taskIsExecuting.exchange(false)); - secondTaskFinished.set_value(); - }; - - Enqueue enqueue; - enqueue.enqueue( - queue, - task2 - ); - }); - - // Both tasks have to be enqueued - thread1.join(); - thread2.join(); - - currentThreadWaitFor(queue); - - firstTaskFinishedFuture.get(); - secondTaskFinishedFuture.get(); -} - -#define TESTER(name) do { \ - int result = name (); \ - fprintf(stderr, #name " %s\n", result?"Errors":"No Errors"); \ - if (result) { failed(#name " failed\n"); } \ -} while (false) - -int main() -{ - TESTER(queueIsInitiallyEmptyRunner); - TESTER(queueCallbackIsWorkingRunner); - TESTER(queueWaitShouldWorkRunner); - TESTER(queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinishedRunner); -// TESTER(queueShouldNotExecuteTasksInParallelRunner); - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamCreateWithPriority.cpp b/tests/src/runtimeApi/stream/hipStreamCreateWithPriority.cpp deleted file mode 100644 index af9f4497c6..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamCreateWithPriority.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define MEMCPYSIZE 64*1024*1024 -#define NUMITERS 2 -#define GRIDSIZE 1024 -#define BLOCKSIZE 256 - -// helper rountine to initialize memory -template -void mem_init(T* buf, size_t n) -{ - for (int i = 0; i < n; i++) - { - buf[i] = i; - } -} - -// kernel to copy n elements from src to dst -template -__global__ void memcpy_kernel(T* dst, T* src, size_t n) -{ - int num = gridDim.x * blockDim.x; - int id = blockDim.x * blockIdx.x + threadIdx.x; - - for (int i = id; i < n; i += num) - { - dst[i] = src[i]; - } -} - -template -int runTest() -{ - size_t size = NUMITERS*MEMCPYSIZE; - - // get the range of priorities available - #define OP(x) \ - int priority_##x; \ - bool enable_priority_##x = false; - OP(low) - OP(normal) - OP(high) - #undef OP - HIPCHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high)); - printf("HIP stream priority range - low: %d to high: %d\n", priority_low, priority_high); - - // Check if priorities are indeed supported - if (priority_low == 0 && priority_high == 0) { - printf("The device doesn't support stream priorities\n"); - passed(); - } - - // Enable/disable priorities based on number of available priority levels - enable_priority_low = true; - enable_priority_high = true; - if ((priority_low - priority_high) > 1) enable_priority_normal = true; - if (enable_priority_normal) priority_normal = ((priority_low + priority_high) / 2); - - // create streams with highest and lowest available priorities - #define OP(x) \ - hipStream_t stream_##x; \ - if (enable_priority_##x) { \ - HIPCHECK(hipStreamCreateWithPriority(&stream_##x, hipStreamDefault, priority_##x)); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // allocate and initialise host source and destination buffers - #define OP(x) \ - T* src_h_##x; \ - T* dst_h_##x; \ - if (enable_priority_##x) { \ - src_h_##x = (T*)malloc(size); \ - if (src_h_##x == NULL) { printf("src_h_%s malloc failed!\n", #x); exit(-1); } \ - mem_init(src_h_##x, (size / sizeof(T))); \ - dst_h_##x = (T*)malloc(size); \ - if (dst_h_##x == NULL) { printf("dst_h_%s malloc failed!\n", #x); exit(-1); } \ - memset(dst_h_##x, 0, size); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // allocate and initialize device source and destination buffers - #define OP(x) \ - T* src_d_##x; \ - T* dst_d_##x; \ - if (enable_priority_##x) { \ - HIPCHECK(hipMalloc(&src_d_##x, size)); \ - HIPCHECK(hipMemcpy(src_d_##x, src_h_##x, size, hipMemcpyHostToDevice)); \ - HIPCHECK(hipMalloc(&dst_d_##x, size)); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // create events for measuring time spent in kernel execution - #define OP(x) \ - hipEvent_t event_start_##x; \ - hipEvent_t event_end_##x; \ - if (enable_priority_##x) { \ - HIPCHECK(hipEventCreate(&event_start_##x)); \ - HIPCHECK(hipEventCreate(&event_end_##x)); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // record start events for each of the priority streams - #define OP(x) \ - if (enable_priority_##x) { \ - HIPCHECK(hipEventRecord(event_start_##x, stream_##x)); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // launch kernels repeatedly on each of the prioritiy streams - for (int i = 0; i < size; i += MEMCPYSIZE) - { - int j = i / sizeof(T); - #define OP(x) \ - if (enable_priority_##x) { \ - hipLaunchKernelGGL((memcpy_kernel), dim3(GRIDSIZE), dim3(BLOCKSIZE), 0, stream_##x, dst_d_##x + j, src_d_##x + j, (MEMCPYSIZE / sizeof(T))); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - } - - // record end events for each of the priority streams - #define OP(x) \ - if (enable_priority_##x) { \ - HIPCHECK(hipEventRecord(event_end_##x, stream_##x)); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // synchronize events for each of the priority streams - #define OP(x) \ - if (enable_priority_##x) { \ - HIPCHECK(hipEventSynchronize(event_end_##x)); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // compute time spent for memcpy in each stream - #define OP(x) \ - float time_spent_##x; \ - if (enable_priority_##x) { \ - HIPCHECK(hipEventElapsedTime(&time_spent_##x, event_start_##x, event_end_##x)); \ - printf("time spent for memcpy in %6s priority stream: %.3lf ms\n", #x, time_spent_##x); \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // sanity check - #define OP(x) \ - if (enable_priority_##x) { \ - HIPCHECK(hipMemcpy(dst_h_##x, dst_d_##x, size, hipMemcpyDeviceToHost)); \ - if (memcmp(dst_h_##x, src_h_##x, size) != 0) { printf("memcmp for %s failed!\n", #x); exit(-1); } \ - } - OP(low) - OP(normal) - OP(high) - #undef OP - - // validate that stream priorities are working as expected - #define OP(x, y) \ - if (enable_priority_##x && enable_priority_##y) { \ - if ((1.05f * time_spent_##x) < time_spent_##y) { printf("FAILED!"); exit(-1); } \ - } - OP(low, normal) - OP(normal, high) - OP(low, high) - #undef OP - passed(); -} - -int main(int argc, char **argv) -{ - HipTest::parseStandardArguments(argc, argv, false); - return runTest(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamGetCUMask.cpp b/tests/src/runtimeApi/stream/hipStreamGetCUMask.cpp deleted file mode 100644 index 1ea05c7ec4..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamGetCUMask.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -using namespace std; - -int main(int argc, char* argv[]) { - hipStream_t stream; - vector cuMask(8); - stringstream ss; - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - return 0; - } - - static int device = 0; - HIPCHECK(hipSetDevice(device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name << - " with " << props.multiProcessorCount << " CUs" << endl; - - std::string str_out, str_err = "hipErrorInvalidValue"; - - char* gCUMask = NULL; - string globalCUMask(""); - - gCUMask = getenv("ROC_GLOBAL_CU_MASK"); - if (gCUMask != NULL && gCUMask[0] != '\0') { - globalCUMask.assign(gCUMask); - for_each(globalCUMask.begin(), globalCUMask.end(), [](char & c) { - c = ::tolower(c); - }); - } - - // make a default CU mask bit-array where all CUs are active - // this default mask is expected to be returned when there is no - // custom or global CU mask defined - std::vector defaultCUMask; - uint32_t temp = 0; - uint32_t bit_index = 0; - for (uint32_t i = 0; i < props.multiProcessorCount; i++) { - temp |= 1UL << bit_index; - if (bit_index >= 32) { - defaultCUMask.push_back(temp); - temp = 0; - bit_index = 0; - temp |= 1UL << bit_index; - } - bit_index += 1; - } - if (bit_index != 0) { - defaultCUMask.push_back(temp); - } - - str_out = hipGetErrorName(hipExtStreamGetCUMask(0, cuMask.size(), 0)); - if ((str_err.compare(str_out)) != 0) { - failed("hipExtStreamGetCUMask returned wrong error code!"); - } - - str_out = hipGetErrorName(hipExtStreamGetCUMask(0, 0, &cuMask[0])); - if ((str_err.compare(str_out)) != 0) { - failed("hipExtStreamGetCUMask returned wrong error code!"); - } - - // read the CU mask for the null stream, when this call returns - // the content of cuMask should be either the global CU mask (if it is defined) or - // the defautl CU mask where all CUs are active - HIPCHECK(hipExtStreamGetCUMask(0, cuMask.size(), &cuMask[0])); - - ss << std::hex; - for (int i = cuMask.size() - 1; i >= 0; i--) { - ss << cuMask[i]; - } - - // remove extra 0 from ss if any present - size_t found = ss.str().find_first_not_of("0"); - if (found != string::npos) { - ss.str(ss.str().substr(found, ss.str().length())); - } - - if (globalCUMask.size() > 0) { - if (ss.str().compare(globalCUMask) != 0) { - failed("Error! expected the CU mask: %s but got %s", globalCUMask.c_str(), ss.str().c_str()); - } - } else { - for (auto i = 0 ; i < min(cuMask.size(), defaultCUMask.size()); i++) { - if (cuMask[i] != defaultCUMask[i]) { - failed("Error! expected the CU mask: %u but got %u", defaultCUMask[i], cuMask[i]); - } - } - } - - cout << "info: CU mask for the default stream is: 0x" << ss.str().c_str() << endl; - - vector cuMask1(defaultCUMask); - if (props.major >= 10) { - // For gfx >= 10, one work group processor encompasses 2 CUs & - // hence the CUs need to be enabled in pair - cuMask1[0] = 0xc; - } else { - cuMask1[0] = 0xe; - } - - HIPCHECK(hipExtStreamCreateWithCUMask(&stream, cuMask1.size(), cuMask1.data())); - ss.str(""); - for (int i = cuMask1.size() - 1; i >= 0; i--) { - ss << cuMask1[i]; - } - cout << "info: setting a custom CU mask 0x" << ss.str() << " for stream " << stream << endl; - - HIPCHECK(hipExtStreamGetCUMask(stream, cuMask.size(), &cuMask[0])); - - - if (!gCUMask) { - for (int i = 0; i < cuMask1.size(); i++) { - if (cuMask1[i] != cuMask[i]) { - HIPCHECK(hipStreamDestroy(stream)); - failed("Error! expected the CU mask: %u but got %u", cuMask1[i], cuMask[i]); - } - } - } - - ss.str(""); - for (int i = cuMask.size() - 1; i >= 0; i--) { - ss << cuMask[i]; - } - found = ss.str().find_first_not_of("0"); - if (found != string::npos) { - ss.str(ss.str().substr(found, ss.str().length())); - } - - cout << "info: reading back CU mask 0x" << ss.str() << " for stream " << stream << endl; - - HIPCHECK(hipStreamDestroy(stream)); - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp b/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp deleted file mode 100644 index 953ae3d659..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamGetFlags.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" - - -int main(int argc, char* argv[]) { - hipStream_t stream; - unsigned int flags; - HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamDefault)); - HIPCHECK(hipStreamGetFlags(stream, &flags)); - HIPASSERT(flags == 0); - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); - HIPCHECK(hipStreamGetFlags(stream, &flags)); - HIPASSERT(flags == 1); - HIPCHECK(hipStreamDestroy(stream)); - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamGetPriority.cpp b/tests/src/runtimeApi/stream/hipStreamGetPriority.cpp deleted file mode 100644 index 8d89a78ce4..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamGetPriority.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 - * TEST: %t - * HIT_END - */ - - -#include "test_common.h" - -int main(int argc, char *argv[]) { - int numDevices; - hipGetDeviceCount(&numDevices); - for (int i = 0; i < numDevices; i++) { - hipStream_t stream; - int priority; - int priority_normal; - int priority_low; - int priority_high; - int priority_check; - - // Test is to get the Stream Priority Range - HIPCHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high)); - printf("Priority range is %d for low and %d for high \n", priority_low, priority_high); - priority_normal = priority_low + priority_high; - - // Check if priorities are indeed supported - if ((priority_low + priority_high) != 0) { - passed(); // exit the test since priorities are not supported - } - - // Checking Priority of default stream - HIPCHECK(hipStreamCreate(&stream)); - HIPCHECK(hipStreamGetPriority(stream, &priority)); - if (priority_normal != priority) { - failed("Unable to set Normal Priority for the stream"); - } - HIPCHECK(hipStreamDestroy(stream)); - - // Creating Stream with Priorities - HIPCHECK(hipStreamCreateWithPriority(&stream, hipStreamDefault, priority_high)); - HIPCHECK(hipStreamGetPriority(stream, &priority_check)); - if (priority_check != priority_high) { - failed("Unable to set high priority for the stream"); - } - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipStreamCreateWithPriority(&stream, hipStreamDefault, priority_low)); - HIPCHECK(hipStreamGetPriority(stream, &priority_check)); - if (priority_check != priority_low) { - failed("Unable to set low priority for the stream"); - } - HIPCHECK(hipStreamDestroy(stream)); - - // creating a stream with boundry cases - HIPCHECK(hipStreamCreateWithPriority(&stream, hipStreamNonBlocking, priority_low+1)); - HIPCHECK(hipStreamGetPriority(stream, &priority_check)); - if (priority_check != priority_low) { - failed("setting priority failed "); - } - HIPCHECK(hipStreamDestroy(stream)); - - HIPCHECK(hipStreamCreateWithPriority(&stream, hipStreamNonBlocking, priority_high-1)); - HIPCHECK(hipStreamGetPriority(stream, &priority_check)); - if (priority_check != priority_high) { - failed("setting priority failed "); - } - HIPCHECK(hipStreamDestroy(stream)); - } - - passed(); - return 0; -} diff --git a/tests/src/runtimeApi/stream/hipStreamL5.cpp b/tests/src/runtimeApi/stream/hipStreamL5.cpp deleted file mode 100644 index 1bdeeb8e3e..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamL5.cpp +++ /dev/null @@ -1,851 +0,0 @@ -/* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include "hipStream.h" - -/* -The naming of tests is done by assigning a number to -type of disptach possible on stream. -The following are possible stream dispatches: -1. H2H - hipMemcpyHostToHost : indexed as 1 -2. H2D - hipMemcpyHostToDevice : indexed as 2 -3. Ker - Kernel Dispatch : indexed as 3 -4. D2D - hipMemcpyDeviceToDevice : indexed as 4 -5. D2H - hipMemcpyDeviceToHost : indexed as 5 -For example, -a test for Ker, D2D, D2H, H2H, H2D is given as test34512(); -Note that all memory copies are Async. - -invalid{ -*WARNING: The commented out assertions are failing cases. -According to my observation, they are happening with tests -which end in HostToHost and take data from previous -dispatch in the stream. This also include disjoint data passes. -The list of failing tests are: -test23451(); -test32451(); -test42351(); - -For disjoint data passed: -test24513 -test25134 -test34512 -} -*/ - -template -void test12345() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Ad, *Bd; - initArrays(&Ad, &Ah, N, true); - initArrays(&Bd, &Bh, N, true); - initArrays(&Ch, N, false, true); - - setArray(Ah, N, T(1)); - - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Ad, Bh, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ch, Bd, size, stream); - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Ah[10] + T(1) == Ch[10]); - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test13452() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Ad, Dh, size); - - H2HAsync(Bh, Ah, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ch, Bd, size, stream); - H2DAsync(Cd, Ch, size, stream); - HIPCHECK(hipDeviceSynchronize()); - - D2H(Eh, Cd, size); - - HIPASSERT(Ah[10] == Bh[10]); - HIPASSERT(Eh[10] == Dh[10] + T(1)); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test14523() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Ad, Dh, size); - - H2HAsync(Bh, Ah, size, stream); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ch, Bd, size, stream); - H2DAsync(Cd, Ch, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Cd); - - HIPCHECK(hipDeviceSynchronize()); - - D2H(Eh, Cd, size); - - HIPASSERT(Ah[10] == Bh[10]); - HIPASSERT(Ch[10] + T(1) == Eh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test15234() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Ad, Dh, size); - - H2HAsync(Bh, Ah, size, stream); - D2HAsync(Ch, Ad, size, stream); - H2DAsync(Bd, Ch, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Bd); - D2DAsync(Cd, Bd, size, stream); - - D2H(Eh, Cd, size); - - HIPASSERT(Ah[10] == Bh[10]); - HIPASSERT(Eh[10] == Dh[10] + T(1)); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test23451() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Ad, *Bd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - - setArray(Ah, N, T(1)); - - H2DAsync(Ad, Ah, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Bh, Bd, size, stream); - H2HAsync(Ch, Bh, size, stream); - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Ah[10] + T(1) == Ch[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test24513() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Cd, Dh, size); - - H2DAsync(Ad, Ah, size, stream); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Bh, Bd, size, stream); - H2HAsync(Ch, Bh, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Cd); - HIPCHECK(hipDeviceSynchronize()); - - D2H(Eh, Cd, size); - - HIPASSERT(Eh[0] == Dh[0] + T(1)); - HIPASSERT(Ah[0] == Ch[0]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test25134() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Bd, Dh, size); - - H2DAsync(Ad, Ah, size, stream); - D2HAsync(Bh, Ad, size, stream); - H2HAsync(Ch, Bh, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Bd); - D2DAsync(Cd, Bd, size, stream); - - D2H(Eh, Cd, size); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Ah[10] == Ch[10]); - HIPASSERT(Dh[10] + T(1) == Eh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test21345() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh, *Ch, *Dh; - T *Ad, *Bd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, true); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Bh, N, T(2)); - - H2DAsync(Ad, Ah, size, stream); - H2HAsync(Ch, Bh, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Dh, Bd, size, stream); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Bh[10] == Ch[10]); - HIPASSERT(Ah[10] + T(1) == Dh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test34512() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Bh, *Ch, *Dh; - T *Ah, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, true); - initArrays(&Ah, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - - H2D(Ad, Ah, size); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Bh, Bd, size, stream); - H2HAsync(Ch, Bh, size, stream); - H2DAsync(Cd, Ch, size, stream); - - D2H(Dh, Cd, size); - - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Ah[10] + T(1) == Dh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test35124() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh; - T *Ch, *Dh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, false); - initArrays(&Dh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Dh, N, T(1)); - - H2D(Ad, Dh, size); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - D2HAsync(Ah, Ad, size, stream); - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Bd, Bh, size, stream); - D2DAsync(Cd, Bd, size, stream); - - D2H(Ch, Cd, size); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Dh[10] + T(1) == Ch[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test31245() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Dh, N, T(1)); - setArray(Ah, N, T(2)); - - H2D(Ad, Dh, size); - - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Bd, Bh, size, stream); - D2DAsync(Cd, Bd, size, stream); - D2HAsync(Ch, Cd, size, stream); - - D2H(Eh, Ad, size); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Dh[10] + T(1) == Eh[10]); - HIPASSERT(Bh[10] == Ch[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - - -template -void test32451() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Eh, N, T(2)); - - H2D(Ad, Eh, size); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Ad); - H2DAsync(Bd, Ah, size, stream); - D2DAsync(Cd, Bd, size, stream); - D2HAsync(Bh, Cd, size, stream); - H2HAsync(Ch, Bh, size, stream); - HIPCHECK(hipDeviceSynchronize()); - D2H(Dh, Ad, size); - - HIPASSERT(Ah[10] == Ch[10]); - HIPASSERT(Eh[10] + T(1) == Dh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test45123() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - const size_t size = N * sizeof(T); - - T *Ah, *Bh; - T *Ch, *Dh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, false); - initArrays(&Dh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Dh, N, T(1)); - - H2D(Ad, Dh, size); - - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ah, Bd, size, stream); - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Cd, Bh, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Cd); - D2H(Ch, Cd, size); - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Dh[10] + T(1) == Ch[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - - -template -void test41235() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - const size_t size = N * sizeof(T); - - T *Ah, *Bh; - T* Ch; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ch, N, T(1)); - - H2D(Ad, Ch, size); - - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ah, Bd, size, stream); - H2DAsync(Cd, Ah, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Cd); - D2HAsync(Bh, Cd, size, stream); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Ch[10] + T(1) == Bh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test42351() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Dh, N, T(2)); - setArray(Ah, N, T(1)); - - H2D(Ad, Dh, size); - - D2DAsync(Bd, Ad, size, stream); - H2DAsync(Cd, Ah, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Cd); - D2HAsync(Bh, Cd, size, stream); - H2HAsync(Ch, Bh, size, stream); - - D2H(Eh, Bd, size); - - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Dh[10] == Eh[10]); - HIPASSERT(Ah[10] + T(1) == Ch[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test43512() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh; - T *Ch, *Dh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, false); - initArrays(&Dh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Dh, N, T(1)); - - H2D(Ad, Dh, size); - - D2DAsync(Bd, Ad, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Bd); - D2HAsync(Ah, Bd, size, stream); - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Cd, Bh, size, stream); - - D2H(Ch, Cd, size); - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Dh[10] + T(1) == Ch[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test51234() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh; - T *Ch, *Dh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, false); - initArrays(&Dh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Dh, N, T(1)); - - H2D(Ad, Dh, size); - - D2HAsync(Ah, Ad, size, stream); - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Bd, Bh, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Bd); - D2DAsync(Cd, Bd, size, stream); - - D2H(Ch, Cd, size); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Ch[10] == Dh[10] + T(1)); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test52341() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - const size_t size = N * sizeof(T); - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Eh, N, T(1)); - setArray(Bh, N, T(2)); - - H2D(Ad, Eh, size); - - D2HAsync(Ah, Ad, size, stream); - H2DAsync(Bd, Ah, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Bd); - D2DAsync(Cd, Bd, size, stream); - H2HAsync(Ch, Bh, size, stream); - - D2H(Dh, Cd, size); - - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Eh[10] + T(1) == Dh[10]); - HIPASSERT(Ch[10] == Bh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test53412() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch, *Dh; - T *Eh, *Fh, *Gh; - T *Ad, *Bd, *Cd, *Dd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, true); - initArrays(&Eh, N, false, false); - initArrays(&Fh, N, false, false); - initArrays(&Gh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - initArrays(&Dd, N, true, false); - - setArray(Dh, N, T(1)); - setArray(Eh, N, T(2)); - setArray(Bh, N, T(3)); - - H2D(Ad, Dh, size); - H2D(Bd, Eh, size); - - D2HAsync(Ah, Ad, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Bd); - D2DAsync(Cd, Bd, size, stream); - H2HAsync(Ch, Bh, size, stream); - H2DAsync(Dd, Ch, size, stream); - - D2H(Fh, Cd, size); - D2H(Gh, Dd, size); - - HIPASSERT(Ah[10] == Dh[10]); - HIPASSERT(Eh[10] + T(1) == Fh[10]); - HIPASSERT(Bh[10] == Gh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test54123() { - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t size = N * sizeof(T); - - T *Ah, *Bh, *Ch; - T *Dh, *Eh, *Fh, *Gh; - T *Ad, *Bd, *Cd, *Dd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Fh, N, false, false); - initArrays(&Gh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - initArrays(&Dd, N, true, false); - - setArray(Dh, N, T(1)); - setArray(Eh, N, T(1)); - setArray(Bh, N, T(1)); - - H2D(Ad, Dh, size); - H2D(Bd, Eh, size); - - D2HAsync(Ah, Ad, size, stream); - D2DAsync(Cd, Bd, size, stream); - H2HAsync(Ch, Bh, size, stream); - H2DAsync(Dd, Ch, size, stream); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 512), dim3(512), 0, stream, Dd); - - D2H(Fh, Cd, size); - D2H(Gh, Dd, size); - - HIPCHECK(hipDeviceSynchronize()); - HIPASSERT(Dh[10] == Ah[10]); - HIPASSERT(Eh[10] == Fh[10]); - HIPASSERT(Bh[10] + T(1) == Gh[10]); - - HIPCHECK(hipStreamDestroy(stream)); -} - -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, true); - - test12345(); - test13452(); - test14523(); - test15234(); - - test23451(); - test24513(); - test25134(); - test21345(); - - test34512(); - test35124(); - test31245(); - test32451(); - - test45123(); - test41235(); - test42351(); - test43512(); - - test51234(); - test52341(); - test53412(); - test54123(); - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamSync2.cpp b/tests/src/runtimeApi/stream/hipStreamSync2.cpp deleted file mode 100644 index 463a6a3209..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamSync2.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp - * TEST: %t - * HIT_END - */ - - -#include "test_common.h" - -enum SyncMode { - syncNone, - syncNullStream, - syncOtherStream, - syncMarkerThenOtherStream, - syncMarkerThenOtherNonBlockingStream, - syncDevice -}; - - -const char* syncModeString(int syncMode) { - switch (syncMode) { - case syncNone: - return "syncNone"; - case syncNullStream: - return "syncNullStream"; - case syncOtherStream: - return "syncOtherStream"; - case syncMarkerThenOtherStream: - return "syncMarkerThenOtherStream"; - case syncMarkerThenOtherNonBlockingStream: - return "syncMarkerThenOtherNonBlockingStream"; - case syncDevice: - return "syncDevice"; - default: - return "unknown"; - }; -}; - - -void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, SyncMode syncMode, - bool expectMismatch) { - // This test sends a long-running kernel to the null stream, then tests to see if the - // specified synchronization technique is effective. - // - // Some syncMode are not expected to correctly sync (for example "syncNone"). in these - // cases the test sets expectMismatch and the check logic below will attempt to ensure that - // the undesired synchronization did not occur - ie ensure the kernel is still running and did - // not yet update the stop event. This can be tricky since if the kernel runs fast enough it - // may complete before the check. To prevent this, the addCountReverse has a count parameter - // which causes it to loop repeatedly, and the results are checked in reverse order. - // - // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results - // are checked and we test to make sure stop event has completed. - - if (!(testMask & p_tests)) { - return; - } - printf("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n", testMask, syncModeString(syncMode), - expectMismatch); - - size_t sizeBytes = numElements * sizeof(int); - - int count = 100; - int init0 = 0; - HIPCHECK(hipMemset(C_d, init0, sizeBytes)); - for (int i = 0; i < numElements; i++) { - C_h[i] = -1; // initialize - } - - hipStream_t otherStream = 0; - unsigned flags = (syncMode == syncMarkerThenOtherNonBlockingStream) ? hipStreamNonBlocking - : hipStreamDefault; - HIPCHECK(hipStreamCreateWithFlags(&otherStream, flags)); - hipEvent_t stop, otherStreamEvent; - HIPCHECK(hipEventCreate(&stop)); - HIPCHECK(hipEventCreate(&otherStreamEvent)); - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - // Launch kernel into null stream, should result in C_h == count. - hipLaunchKernelGGL(HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, - 0 /*stream*/, static_cast(C_d), C_h, numElements, count); - HIPCHECK(hipEventRecord(stop, 0 /*default*/)); - - switch (syncMode) { - case syncNone: - break; - case syncNullStream: - HIPCHECK(hipStreamSynchronize(0)); // wait on host for null stream: - break; - case syncOtherStream: - // Does this synchronize with the null stream? - HIPCHECK(hipStreamSynchronize(otherStream)); - break; - case syncMarkerThenOtherStream: - case syncMarkerThenOtherNonBlockingStream: - - // this may wait for NULL stream depending hipStreamNonBlocking flag above - HIPCHECK(hipEventRecord(otherStreamEvent, otherStream)); - - HIPCHECK(hipStreamSynchronize(otherStream)); - break; - case syncDevice: - HIPCHECK(hipDeviceSynchronize()); - break; - default: - assert(0); - }; - - hipError_t done = hipEventQuery(stop); - - if (expectMismatch) { - assert(done == hipErrorNotReady); - } else { - assert(done == hipSuccess); - } - - int mismatches = 0; - int expected = init0 + count; - for (int i = 0; i < numElements; i++) { - bool compareEqual = (C_h[i] == expected); - if (!compareEqual) { - mismatches++; - if (!expectMismatch) { - printf("C_h[%d] (%d) != %d\n", i, C_h[i], expected); - assert(C_h[i] == expected); - } - } - } - - if (expectMismatch) { - assert(mismatches > 0); - } - - - HIPCHECK(hipStreamDestroy(otherStream)); - HIPCHECK(hipEventDestroy(stop)); - HIPCHECK(hipEventDestroy(otherStreamEvent)); - - HIPCHECK(hipDeviceSynchronize()); - - printf("test: OK - %d mismatches (%6.2f%%)\n", mismatches, - ((double)(mismatches)*100.0) / numElements); -} - - -void runTests(int64_t numElements) { - size_t sizeBytes = numElements * sizeof(int); - - printf("\n\ntest: starting sequence with sizeBytes=%zu bytes, %6.2f MB\n", sizeBytes, - sizeBytes / 1024.0 / 1024.0); - - - int *C_h, *C_d; - HIPCHECK(hipMalloc(&C_d, sizeBytes)); - HIPCHECK(hipHostMalloc(&C_h, sizeBytes)); - - - { - test(0x01, C_d, C_h, numElements, syncNone, true /*expectMismatch*/); - test(0x02, C_d, C_h, numElements, syncNullStream, false /*expectMismatch*/); -#ifndef __HIP_CLANG_ONLY__ - test(0x04, C_d, C_h, numElements, syncOtherStream, true /*expectMismatch*/); -#endif - test(0x08, C_d, C_h, numElements, syncDevice, false /*expectMismatch*/); - - // Sending a marker to to null stream may synchronize the otherStream - // - other created with hipStreamNonBlocking=0 : synchronization, should match - // - other created with hipStreamNonBlocking=1 : no synchronization, may mismatch - test(0x10, C_d, C_h, numElements, syncMarkerThenOtherStream, false /*expectMismatch*/); - - // TODO - review why this test seems flaky - // test (0x20, C_d, C_h, numElements, syncMarkerThenOtherNonBlockingStream, true - // /*expectMismatch*/); - } - - - HIPCHECK(hipFree(C_d)); - HIPCHECK(hipHostFree(C_h)); -} - - -int main(int argc, char* argv[]) { - // Can' destroy the default stream:// TODO - move to another test - HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidHandle); - - HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/); - - runTests(40000000); - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp deleted file mode 100644 index a8fb419d80..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ /dev/null @@ -1,501 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * ZZZBUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * ZZZTEST: %t - * HIT_END - */ - -// Test under-development. Calls async mem-copy API, experiment with functionality. - -#include "hip/hip_runtime.h" -#include "test_common.h" -#include -#include -unsigned p_streams = 8; -unsigned p_db = 0; -unsigned p_count = 100; - - -//------ -// Structure for one stream; -template -class Streamer { -#define COMMAND_ADD_FORWARD 0 -#define COMMAND_ADD_REVERSE 1 -#define COMMAND_COPY 2 - - - public: - Streamer(int deviceId, T* input, size_t numElements, int commandType); - ~Streamer(); - void runAsyncAfter(Streamer* depStreamer, bool waitSameStream = false); - void runAsyncWaitSameStream(); - void queryUntilComplete(); - - size_t check(int streamerNum, T initValue, T expectedOffset, bool expectPass = true); - void copyToHost(hipStream_t copyStream); - - hipEvent_t event() { return _event; }; - - int deviceId() const { return _deviceId; }; - size_t mismatchCount() const { return _mismatchCount; }; - T* C_d() { return _C_d; }; - - // How much does this streamer add to A[i] after running runAsyncAfter - int expectedAdd() const { return (_commandType == COMMAND_COPY) ? 0 : p_count; }; - - - int _commandType; // 0=addReverse, 1=addFwd, 2=move - private: - T* _C_h; - - T* _preA_d; // if input is on another device, this is pointer to that memory. - T* _A_d; - T* _C_d; - - hipStream_t _stream; - hipEvent_t _event; - - int _deviceId; - size_t _numElements; - - size_t _mismatchCount; -}; - - -template -Streamer::Streamer(int deviceId, T* A_d, size_t numElements, int commandType) - : _preA_d(NULL), - _A_d(A_d), - _deviceId(deviceId), - _numElements(numElements), - _commandType(commandType) { - size_t sizeElements = numElements * sizeof(int); - - // if (commandType == 0) _commandType = 1; // TODO - remove me - - HIPCHECK(hipSetDevice(_deviceId)); - - - hipPointerAttribute_t attr; - HIPCHECK(hipPointerGetAttributes(&attr, A_d)); - if (attr.device != deviceId) { - // source is on another device, we will need to copy later. - // So save original source pointer and allocate local space. - printf("info: source for streamer on another device, will insert memcpy\n"); - _preA_d = A_d; - HIPCHECK(hipMalloc(&_A_d, sizeElements)); - HIPCHECK(hipMemset(_A_d, -3, sizeElements)); - } - - HIPCHECK(hipMalloc(&_C_d, sizeElements)); - HIPCHECK(hipHostMalloc(&_C_h, sizeElements)); - - HIPCHECK(hipMemset(_C_d, -1, sizeElements)); - HIPCHECK(hipMemset(_C_h, -2, sizeElements)); - - HIPCHECK(hipStreamCreate(&_stream)); - HIPCHECK(hipEventCreate(&_event)); -}; - - -template -Streamer::~Streamer() { - HIPCHECK(hipSetDevice(_deviceId)); - - printf("info: ~Streamer\n"); - if (_preA_d) { - HIPCHECK(hipFree(_preA_d)); - } - HIPCHECK(hipFree(_C_d)); - HIPCHECK(hipHostFree(_C_h)); - - HIPCHECK(hipStreamDestroy(_stream)); - HIPCHECK(hipEventDestroy(_event)); -} - - -template -void Streamer::runAsyncAfter(Streamer* depStreamer, bool waitSameStream) { - HIPCHECK(hipSetDevice(_deviceId)); - if (p_db) { - printf("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, - _numElements * sizeof(T) / 1024.0 / 1024.0); - } - - if (depStreamer) { - HIPCHECK(hipStreamWaitEvent(_stream, depStreamer->event(), 0)); - } - - if (_preA_d) { - // _preA_d is on another device, so copy to local device so kernel can access it: - HIPCHECK(hipMemcpyAsync(_A_d, _preA_d, _numElements * sizeof(T), hipMemcpyDeviceToDevice, - _stream)); - } - - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements); - if (_commandType == COMMAND_ADD_REVERSE) { - hipLaunchKernelGGL(HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, - _stream, static_cast(_A_d), _C_d, - static_cast(_numElements), static_cast(p_count)); - } else if (_commandType == COMMAND_ADD_FORWARD) { - hipLaunchKernelGGL(HipTest::addCount, dim3(blocks), dim3(threadsPerBlock), 0, _stream, - static_cast(_A_d), _C_d, _numElements, - static_cast(p_count)); - } else if (_commandType == COMMAND_COPY) { - HIPCHECK( - hipMemcpyAsync(_C_d, _A_d, _numElements * sizeof(T), hipMemcpyDeviceToDevice, _stream)); - } else { - assert(0); // bad command type - } - HIPCHECK(hipEventRecord(_event, _stream)); - - if (waitSameStream) { - HIPCHECK(hipStreamWaitEvent( - _stream, _event, 0)); // this is essentially a no-op, but make sure it doesn't crash - } -} - - -template -void Streamer::queryUntilComplete() { - HIPCHECK(hipSetDevice(_deviceId)); - int numQueries = 0; - hipError_t e = hipSuccess; - do { - numQueries++; - e = hipStreamQuery(_stream); - } while (e != hipSuccess); - - printf("info: hipStreamQuery completed after %d queries\n", numQueries); -}; - - -// If copyStream is !nullptr it is used for the copy. -template -void Streamer::copyToHost(hipStream_t copyStream) { - if (p_db) { - printf("db: copy back to host\n"); - } - HIPCHECK(hipSetDevice(_deviceId)); - HIPCHECK(hipMemcpyAsync(_C_h, _C_d, _numElements * sizeof(T), hipMemcpyDeviceToHost, - copyStream ? copyStream : _stream)); - HIPCHECK(hipStreamSynchronize(copyStream ? copyStream : _stream)); -} - - -template -size_t Streamer::check(int streamerNum, T initValue, T expectedOffset, bool expectPass) { - T expected = initValue + expectedOffset; - if (p_db) { - printf("db: check\n"); - } - - _mismatchCount = 0; - for (size_t i = 0; i < _numElements; i++) { - if (_C_h[i] != expected) { - _mismatchCount++; - if (expectPass) { - fprintf(stderr, "for streamer:%d _C_h[%zu] (%d) != expected(%d)\n", streamerNum, - i, _C_h[i], expected); - if (_mismatchCount > 10) { - failed("for streamer:%d _C_h[%zu] (%d) != expected(%d)\n", streamerNum, i, - _C_h[i], expected); - } - } - } - } - - if (!expectPass && (_mismatchCount == 0)) { - // the test should run kernels long enough that if we don't correctly wait for them to - // finish then an error is reported. - // failed("for streamer:%d we expected inavalid synchronization to lead to mismatch but - // none was detected. Increase --N to sensitize sync.\n", streamerNum); - } - - return _mismatchCount; -} - - -//--- -// Parse arguments specific to this test. -void parseMyArguments(int argc, char* argv[]) { - N = 64 * 1024 * 1024; - - int more_argc = HipTest::parseStandardArguments(argc, argv, false); - - // parse args for this test: - for (int i = 1; i < more_argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, "--streams")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_streams)) { - failed("Bad streams argument"); - } - } else if (!strcmp(arg, "--count")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_count)) { - failed("Bad count argument"); - } - } else if (!strcmp(arg, "--db")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_db)) { - failed("Bad db argument"); - } - } else { - failed("Bad argument '%s'", arg); - } - }; -}; - - -typedef Streamer IntStreamer; - - -void runStreamerLoop(std::vector& streamers) { - for (int i = 0; i < streamers.size(); i++) { - streamers[i]->runAsyncAfter(i ? streamers[i - 1] : NULL); - } -} - - -void checkAll(int initValue, std::vector& streamers, - std::vector& sideStreams, bool expectPass = true) { - size_t mismatchCount = 0; - - // Copy in reverse order to catch anything not yet finished... - for (int i = streamers.size() - 1; i >= 0; i--) { - streamers[i]->copyToHost(sideStreams.empty() ? NULL - : sideStreams[streamers[i]->deviceId()]); - } - - - int expected = 0; - // Check in forward order so we can find first mismatch: - for (int i = 0; i < streamers.size(); i++) { - expected += streamers[i]->expectedAdd(); - - mismatchCount += streamers[i]->check(i + 1, initValue, expected, expectPass); - } - if (!expectPass && (mismatchCount == 0)) { - // the test should run kernels long enough that if we don't correctly wait for them to - // finish then an error is reported. - failed( - "we expected inavalid synchronization to lead to mismatch but none was detected. " - "Increase --count to sensitize sync.\n"); - } -} - - -#define RUN_SYNC_TEST(_enableBit, _streamers, _sync, _expectPass) \ - if (p_tests & (_enableBit)) { \ - printf("==> Test %02x runAsyncAfter sync=%s\n", (_enableBit), #_sync); \ - runStreamerLoop(_streamers); \ - (_sync); \ - checkAll(initValue, _streamers, sideStreams, _expectPass); \ - } - - -//--- -// A family of sync functions which somehow wait for inflight activity to finish: - - -void sync_none(void){}; - -void sync_allDevices(int numDevices) { - for (int d = 0; d < numDevices; d++) { - HIPCHECK(hipSetDevice(d)); - HIPCHECK(hipDeviceSynchronize()); - } -} - - -void sync_queryAllUntilComplete(std::vector streamers) { - for (int i = streamers.size() - 1; i >= 0; i--) { - streamers[i]->queryUntilComplete(); - }; -} - - -void sync_streamWaitEvent(hipEvent_t lastEvent, int sideDeviceId, hipStream_t sideStream, - bool waitHere) { - HIPCHECK(hipSetDevice(sideDeviceId)); - - // wait on the last event in the stream of chained streamers: - // This plants a marker which the subsquent copy for this device will wait on: - HIPCHECK(hipStreamWaitEvent(sideStream, lastEvent, 0)); - - if (waitHere) { - HIPCHECK(hipStreamSynchronize(sideStream)); - } -} - - -//--- -int main(int argc, char* argv[]) { - HipTest::parseStandardArguments(argc, argv, false); - parseMyArguments(argc, argv); - - - size_t numElements = N; - size_t sizeElements = numElements * sizeof(int); - - printf("info: sizeof arrays = %zu elements (%6.3f MB)\n", numElements, - sizeElements / 1024.0 / 1024.0); - printf("info: streams=%d count=%d\n", p_streams, p_count); - - assert(sizeElements <= std::numeric_limits::max()); - - - int initValue = 1000; - - int *initArray_d, *initArray_h; - HIPCHECK(hipMalloc(&initArray_d, sizeElements)); - HIPCHECK(hipHostMalloc(&initArray_h, sizeElements)); - for (size_t i = 0; i < numElements; i++) { - initArray_h[i] = initValue; - } - HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice)); - - - int numDevices; - HIPCHECK(hipGetDeviceCount(&numDevices)); - numDevices = min(2, numDevices); // multi-GPU to 2 device. - - std::vector streamers; - std::vector streamersDev0; // streamers for first device. - - for (int d = 0; d < numDevices /*TODO*/; d++) { - for (int i = 0; i < p_streams; i++) { - int command = (i % 2) ? COMMAND_ADD_FORWARD : COMMAND_ADD_REVERSE; - IntStreamer* s = - new IntStreamer(d, i ? streamers.back()->C_d() : initArray_d, numElements, command); - streamers.push_back(s); - if (d == 0) { - streamersDev0.push_back(s); - } - } - } - - - // A sideband stream channel that is independent from above. - // Used to check to ensure the WaitEvent or other synchronization is working correctly since by - // default sideStream is asynchronous wrt the other streams. - std::vector sideStreams; - for (int d = 0; d < numDevices; d++) { - hipStream_t s; - HIPCHECK(hipStreamCreate(&s)); - sideStreams.push_back(s); - } - - - // Tests on first GPU: - // - // This test has no synchronization - make sure it mismatches so we can ensure the other tests - // properyl prevent the mismatch: - RUN_SYNC_TEST(0x01, streamersDev0, sync_none(), false); - - RUN_SYNC_TEST(0x02, streamersDev0, sync_allDevices(numDevices), true); - RUN_SYNC_TEST(0x04, streamersDev0, sync_queryAllUntilComplete(streamersDev0), true); - RUN_SYNC_TEST(0x08, streamersDev0, - sync_streamWaitEvent(streamersDev0.back()->event(), 0, sideStreams[0], false), - true); - - if (numDevices > 1) { - // Sync on second device for activity running on device 0: - RUN_SYNC_TEST(0x10, streamersDev0, - sync_streamWaitEvent(streamersDev0.back()->event(), 1, sideStreams[1], true), - true); - } - - - // Tests on all GPUs: - // RUN_SYNC_TEST(0x100, streamers, sync_streamWaitEvent(streamers.back()->event(), 0, - // sideStreams[0], false), true); - - - if (p_tests & 0x1000) { - printf("==> Test 0x1000 simple null stream tests\n"); - - // try some null stream: - hipStreamQuery(0); - - - hipStream_t s1; - hipEvent_t e1; - - { - // stream null waits on event in s1 stream: - HIPCHECK(hipStreamCreate(&s1)); - HIPCHECK(hipEventCreate(&e1)); - - HIPCHECK(hipEventRecord(e1, s1)) - - HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0 /*flags*/)); - - HIPCHECK(hipStreamDestroy(s1)); - HIPCHECK(hipEventDestroy(e1)); - } - - { - // stream s1 waits on event in null stream: - HIPCHECK(hipStreamCreate(&s1)); - HIPCHECK(hipEventCreate(&e1)); - - HIPCHECK(hipEventRecord(e1, hipStream_t(0))) - - HIPCHECK(hipStreamWaitEvent(s1, e1, 0 /*flags*/)); - - HIPCHECK(hipStreamDestroy(s1)); - HIPCHECK(hipEventDestroy(e1)); - } - } - - - // Insert small wrinkle here, insert a wait on event just recorded, all in the same stream. - if (p_tests & 0x2000) { - printf("==> Test 0x2000 runAsyncWaitSameStream\n"); - for (int i = 0; i < streamersDev0.size(); i++) { - streamersDev0[i]->runAsyncAfter(i ? streamersDev0[i - 1] : NULL, - true /*waitSameStream*/); - } - - sync_streamWaitEvent(streamersDev0.back()->event(), 0, sideStreams[0], false); - checkAll(initValue, streamersDev0, sideStreams); - } - - - // Change Adds to copies to stimulate different case with event followign copy: - for (auto& s : streamers) { - if (s->_commandType == COMMAND_ADD_FORWARD) s->_commandType = COMMAND_COPY; - } - - - if (p_tests & 0x4000) { - printf("test: %x alternating memcpy/count-reverse followed by event\n", p_tests); - RUN_SYNC_TEST(0x4000, streamersDev0, sync_queryAllUntilComplete(streamersDev0), true); - RUN_SYNC_TEST(0x8000, streamersDev0, - sync_streamWaitEvent(streamersDev0.back()->event(), 0, sideStreams[0], false), - true); - } - - - passed(); -} diff --git a/tests/src/runtimeApi/stream/hipStreamWithCUMask.cpp b/tests/src/runtimeApi/stream/hipStreamWithCUMask.cpp deleted file mode 100644 index 07ac4c527b..0000000000 --- a/tests/src/runtimeApi/stream/hipStreamWithCUMask.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include -#include -#include -#include "test_common.h" - -using namespace std; - -__global__ void vector_square(float *C_d, float *A_d, size_t N) { - size_t idx = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x ; - - for (size_t i = idx; i < N; i += stride) { - C_d[i] = A_d[i] * A_d[i]; - } -} - -int main(int argc, char* argv[]) { - constexpr uint32_t numPartition = 4; - float *dA[numPartition], *dC[numPartition]; - float *hA, *hC; - size_t N = 25 * 1024 * 1024; - size_t Nbytes = N * sizeof(float); - vector streams(numPartition); - vector> cuMasks(numPartition); - stringstream ss[numPartition]; - - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - cout << "info: didn't find any GPU! skipping the test!\n"; - passed(); - return 0; - } - - static int device = 0; - HIPCHECK(hipSetDevice(device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name << - " with " << props.multiProcessorCount << " CUs" << endl; - - hA = new float[Nbytes]; - HIPCHECK(hA == 0 ? hipErrorOutOfMemory : hipSuccess); - hC = new float[Nbytes]; - HIPCHECK(hC == 0 ? hipErrorOutOfMemory : hipSuccess); - for (size_t i = 0; i < N; i++) { - hA[i] = 1.618f + i; - } - - int cuCountPerGroup = 1; - if (props.major >= 10) { - cuCountPerGroup = 2; // For gfx >= 10, one work group processor encompasses 2 CUs - } - - unsigned long mask = pow(2, cuCountPerGroup) - 1; - - for (int np = 0; np < numPartition; np++) { - - HIPCHECK(hipMalloc(&dA[np], Nbytes)); - HIPCHECK(hipMalloc(&dC[np], Nbytes)); - - // make unique CU masks in the multiple of dwords for each stream - uint32_t temp = 0; - uint32_t bit_index = cuCountPerGroup * np; - for (int i = np; i < props.multiProcessorCount; i = i + cuCountPerGroup * 4) { - temp |= mask << bit_index; - if (bit_index >= 32) { - cuMasks[np].push_back(temp); - temp = 0; - bit_index = cuCountPerGroup * np; - temp |= mask << bit_index; - } - bit_index += cuCountPerGroup * 4; - } - if (bit_index != 0) { - cuMasks[np].push_back(temp); - } - - HIPCHECK(hipExtStreamCreateWithCUMask(&streams[np], cuMasks[np].size(), cuMasks[np].data())); - - HIPCHECK(hipMemcpy(dA[np], hA, Nbytes, hipMemcpyHostToDevice)); - - ss[np] << std::hex << std::setfill('0'); - for (int i = cuMasks[np].size() - 1; i >= 0; i--) { - ss[np] << std::setw(8) << cuMasks[np][i]; - } - } - - const unsigned blocks = 512; - const unsigned threadsPerBlock = 256; - - auto single_start = chrono::steady_clock::now(); - cout << "info: launch 'vector_square' kernel on one stream " << streams[0] << " with CU mask: 0x" << ss[0].str().c_str() << endl; - - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, streams[0], dC[0], dA[0], N); - hipDeviceSynchronize(); - - auto single_end = chrono::steady_clock::now(); - chrono::duration single_kernel_time = single_end - single_start; - - HIPCHECK(hipMemcpy(hC, dC[0], Nbytes, hipMemcpyDeviceToHost)); - - for (size_t i = 0; i < N; i++) { - if (hC[i] != hA[i] * hA[i]) { - cout << "info: validation failed for kernel launched at stream" << streams[0] << endl; - HIPCHECK(hipErrorUnknown); - } - } - - cout << "info: launch 'vector_square' kernel on " << numPartition << " streams:" << endl; - auto all_start = chrono::steady_clock::now(); - for (int np = 0; np < numPartition; np++) { - cout << "info: launch 'vector_square' kernel on the stream " << streams[np] << " with CU mask: 0x" << ss[np].str().c_str() << endl; - hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, - streams[np], dC[np], dA[np], N); - } - hipDeviceSynchronize(); - - auto all_end = chrono::steady_clock::now(); - chrono::duration all_kernel_time = all_end - all_start; - - for (int np = 0; np < numPartition; np++) { - HIPCHECK(hipMemcpy(hC, dC[np], Nbytes, hipMemcpyDeviceToHost)); - for (size_t i = 0; i < N; i++) { - if (hC[i] != hA[i] * hA[i]) { - cout << "info: validation failed for kernel launched at stream" << streams[np] << endl; - HIPCHECK(hipErrorUnknown); - } - } - } - - cout << "info: kernel launched on one stream took: " << single_kernel_time.count() << " seconds" << endl; - cout << "info: kernels launched on " << numPartition << " streams took: " << all_kernel_time.count() << " seconds" << endl; - cout << "info: launching kernels on " << numPartition << " streams asynchronously is " << single_kernel_time.count() / (all_kernel_time.count() / numPartition) - << " times faster per stream than launching on one stream alone" << endl; - - delete [] hA; - delete [] hC; - for (int np = 0; np < numPartition; np++) { - hipFree(dC[np]); - hipFree(dA[np]); - HIPCHECK(hipStreamDestroy(streams[np])); - } - - passed(); -} diff --git a/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp b/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp deleted file mode 100644 index 8cd3b7bec2..0000000000 --- a/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp +++ /dev/null @@ -1,460 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test Description: -/* - This unit test is written to test Stream Write and Stream Wait API. - Stream Write: - Both 32 and 64 bit version of this APIs are tested by writing a specific value and checking - the correctness. Various mememory objects (host registered, device and Signal Memory) are tested. - Stream Wait: - Wait API is tested using two memory locations (DataPr and SignalPtr). Following - commands are executed for each type of wait operaitons (GEQ, EQ, AND and NOR) in the order - specified. - 1. CPU : An intial values are written to DataPtr and SignalPtr - 2. GPU : Wait operation (with false condition that blocks the stream) is enqued. - 3. GPU : Write operation on DataPtr to update its value is enqued. - 4. CPU : A query or CPU wait to make sure all commands are processed by GPU. - 5. CPU : streamQuery is performed to make sure it is not finshed executing the commands, - since step-2 is blocking. - 6. CPU : A new value is written to SignalPtr memory that make wait condition defined in - step-2 to be true. This causes step-3 to be executed. - 7. CPU : Synchronize the stream and read value at DataPtr, it should be equal to updated - value (step-3). -*/ - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 EXCLUDE_HIP_PLATFORM nvidia LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#ifdef __linux__ -#include -#endif -#include -#include "test_common.h" - -// Random predefiend 32 and 64 bit values -constexpr uint32_t value32 = 0x70F0F0FF; -constexpr uint64_t value64 = 0x7FFF0000FFFF0000; -constexpr unsigned int writeFlag = 0; - -constexpr float SLEEP_MS = 100; -constexpr uint32_t DATA_INIT = 0x1234; -constexpr uint32_t DATA_UPDATE = 0X4321; -constexpr unsigned int ARR_SIZE = 5; - -struct TEST_WAIT { - int compareOp; - uint64_t mask; - uint64_t waitValue; - int64_t signalValueFail; - int64_t signalValuePass; -}; - -TEST_WAIT testCases[] = { - { - // mask will ignore few MSB bits - hipStreamWaitValueGte, - 0x0000FFFFFFFFFFFF, - 0x000000007FFF0001, - 0x7FFF00007FFF0000, - 0x000000007FFF0001 - }, - { - hipStreamWaitValueGte, - 0xF, - 0x4, - 0x3, - 0x6 - }, - { - // mask will ignore few MSB bits - hipStreamWaitValueEq, - 0x0000FFFFFFFFFFFF, - 0x000000000FFF0001, - 0x7FFF00000FFF0000, - 0x7F0000000FFF0001 - }, - { - hipStreamWaitValueEq, - 0xFF, - 0x11, - 0x25, - 0x11 - }, - { - // mask will discard bits 8 to 11 - hipStreamWaitValueAnd, - 0xFF, - 0xF4A, - 0xF35, - 0X02 - }, - { - // mask is set to ignore the sign bit. - hipStreamWaitValueNor, - 0x7FFFFFFFFFFFFFFF, - 0x7FFFFFFFFFFFF247, - 0x7FFFFFFFFFFFFdbd, - 0x7FFFFFFFFFFFFdb5 - }, - { - // mask is set to apply NOR for bits 0 to 3. - hipStreamWaitValueNor, - 0xF, - 0x7E, - 0x7D, - 0x76 - } -}; - -struct TEST_WAIT32_NO_MASK { - int compareOp; - uint32_t waitValue; - int32_t signalValueFail; - int32_t signalValuePass; -}; - -// default mask 0xFFFFFFFF will be used. -TEST_WAIT32_NO_MASK testCasesNoMask32[] = { - { - hipStreamWaitValueGte, - 0x7FFF0001, - 0x7FFF0000, - 0x7FFF0010 - }, - { - hipStreamWaitValueEq, - 0x7FFFFFFF, - 0x7FFF0000, - 0x7FFFFFFF - }, - { - hipStreamWaitValueAnd, - 0x70F0F0F0, - 0x0F0F0F0F, - 0X1F0F0F0F - }, - { - hipStreamWaitValueNor, - 0x7AAAAAAA, - static_cast(0x85555555), - static_cast(0x9AAAAAAA) - } -}; - -struct TEST_WAIT64_NO_MASK { - int compareOp; - uint64_t waitValue; - int64_t signalValueFail; - int64_t signalValuePass; -}; - -// default mask 0xFFFFFFFFFFFFFFFF will be used. -TEST_WAIT64_NO_MASK testCasesNoMask64[] = { - { - hipStreamWaitValueGte, - 0x7FFFFFFFFFFF0001, - 0x7FFFFFFFFFFF0000, - 0x7FFFFFFFFFFF0001 - }, - { - hipStreamWaitValueEq, - 0x7FFFFFFFFFFFFFFF, - 0x7FFFFFFF0FFF0000, - 0x7FFFFFFFFFFFFFFF - }, - { - hipStreamWaitValueAnd, - 0x70F0F0F0F0F0F0F0, - 0x0F0F0F0F0F0F0F0F, - 0X1F0F0F0F0F0F0F0F - }, - { - hipStreamWaitValueNor, - 0x4724724747247247, - static_cast(0xbddbddbdbddbddbd), - static_cast(0xbddbddbdbddbddb3) - } -}; - -void testWrite() { - - int64_t* signalPtr; - - hipStream_t stream; - hipStreamCreate(&stream); - - uint64_t* host_ptr64 = (uint64_t *) malloc(sizeof(uint64_t)); - uint32_t* host_ptr32 = (uint32_t *) malloc(sizeof(uint32_t)); - uint64_t h_mem64[ARR_SIZE]; - uint32_t h_mem32[ARR_SIZE]; - uint64_t* h_mem64ptr = h_mem64; - uint32_t* h_mem32ptr = h_mem32; - std::cout << " hipStreamWriteValue: testing ... \n"; - - HIPCHECK(hipExtMallocWithFlags((void **)&signalPtr, 8, hipMallocSignalMemory)); - - void* device_ptr64; - void* device_ptr32; - uint64_t* d_mem64ptr; - uint32_t* d_mem32ptr; - *host_ptr64 = 0x0; - *host_ptr32 = 0x0; - *signalPtr = 0x0; - - hipHostRegister(host_ptr64, sizeof(uint64_t), 0); - hipHostRegister(host_ptr32, sizeof(uint32_t), 0); - hipHostRegister(h_mem64ptr, sizeof(uint64_t) * ARR_SIZE, 0); - hipHostRegister(h_mem32ptr, sizeof(uint32_t) * ARR_SIZE, 0); - - // Test writting registered host pointer - HIPCHECK(hipStreamWriteValue64(stream, host_ptr64, value64, writeFlag)); - HIPCHECK(hipStreamWriteValue32(stream, host_ptr32, value32, writeFlag)); - //test writting to an array - for (int indx = 0; indx < ARR_SIZE; indx++) { - HIPCHECK(hipStreamWriteValue64(stream, h_mem64ptr + indx, ARR_SIZE - indx, writeFlag)); - HIPCHECK(hipStreamWriteValue32(stream, h_mem32ptr + indx, ARR_SIZE - indx, writeFlag)); - } - - hipStreamSynchronize(stream); - - HIPASSERT(*host_ptr64 == value64); - HIPASSERT(*host_ptr32 == value32); - for (int indx = 0; indx < ARR_SIZE ; indx++) { - HIPASSERT(*(h_mem64ptr + indx) == (ARR_SIZE - indx)); - HIPASSERT(*(h_mem32ptr + indx) == (ARR_SIZE - indx)); - } - // Test writting device pointer - hipHostGetDevicePointer((void**)&device_ptr64, host_ptr64, 0); - hipHostGetDevicePointer((void**)&device_ptr32, host_ptr32, 0); - hipHostGetDevicePointer((void**)&d_mem64ptr, h_mem64ptr, 0); - hipHostGetDevicePointer((void**)&d_mem32ptr, h_mem32ptr, 0); - - // Reset values - *host_ptr64 = 0x0; - *host_ptr32 = 0x0; - - HIPCHECK(hipStreamWriteValue64(stream, device_ptr64, value64, writeFlag)); - HIPCHECK(hipStreamWriteValue32(stream, device_ptr32, value32, writeFlag)); - for (int indx = 0; indx < ARR_SIZE; indx++) { - HIPCHECK(hipStreamWriteValue64(stream, d_mem64ptr + indx, indx, writeFlag)); - HIPCHECK(hipStreamWriteValue32(stream, d_mem32ptr + indx, indx, writeFlag)); - } - - hipStreamSynchronize(stream); - - HIPASSERT(*host_ptr64 == value64); - HIPASSERT(*host_ptr32 == value32); - for (int indx = 0; indx < ARR_SIZE ; indx++) { - HIPASSERT(*(h_mem64ptr + indx) == indx); - HIPASSERT(*(h_mem32ptr + indx) == indx); - } - - // Test Writing to Signal Memory - HIPCHECK(hipStreamWriteValue64(stream, signalPtr, value64, writeFlag)); - hipStreamSynchronize(stream); - - HIPASSERT(*signalPtr == value64); - - // Cleanup - hipStreamDestroy(stream); - hipHostUnregister(host_ptr64); - hipHostUnregister(host_ptr32); - HIPCHECK(hipFree(signalPtr)); - free(host_ptr32); - free(host_ptr64); -} - -bool streamWaitValueSupported() { - int device_num = 0; - HIPCHECK(hipGetDeviceCount(&device_num)); - int waitValueSupport; - for (int device_id = 0; device_id < device_num; ++device_id) { - HIPCHECK(hipSetDevice(device_id)); - waitValueSupport = 0; - HIPCHECK(hipDeviceGetAttribute(&waitValueSupport, hipDeviceAttributeCanUseStreamWaitValue, - device_id)); - if (waitValueSupport == 1) return true; - } - return false; -} - -void waitAndWrite64(hipStream_t stream, int64_t* signalPtr, TEST_WAIT tc, int64_t* dataPtr64) { - HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.compareOp, tc.mask)); - HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE, writeFlag)); -} -void waitAndWrite32(hipStream_t stream, int64_t* signalPtr, TEST_WAIT tc, int32_t* dataPtr32) { - HIPCHECK(hipStreamWaitValue32(stream, signalPtr, static_cast(tc.waitValue), tc.compareOp, - static_cast(tc.mask))); - HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE, writeFlag)); -} -void waitAndWrite32NoMask(hipStream_t stream, int64_t* signalPtr, TEST_WAIT32_NO_MASK tc, - int32_t* dataPtr32) { - HIPCHECK(hipStreamWaitValue32(stream, signalPtr, tc.waitValue, tc.compareOp)); - HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE, writeFlag)); -} -void waitAndWrite64NoMask(hipStream_t stream, int64_t* signalPtr, TEST_WAIT64_NO_MASK tc, - int64_t* dataPtr64) { - HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.compareOp)); - HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE, writeFlag)); -} - -void testWait() { - int64_t* signalPtr; - - if (!streamWaitValueSupported()) { - std::cout << " hipStreamWaitValue: not supported on this device , skipping ... \n"; - return; - } - std::cout << " hipStreamWaitValue32: testing ... \n"; - std::cout << " hipStreamWaitValue64: testing ... \n"; - hipStream_t stream; - hipStreamCreate(&stream); - - HIPCHECK(hipExtMallocWithFlags((void **)&signalPtr, 8, hipMallocSignalMemory)); - int64_t* dataPtr64 = (int64_t *) malloc(sizeof(int64_t)); - int32_t* dataPtr32 = (int32_t *) malloc(sizeof(int32_t)); - hipHostRegister(dataPtr64, sizeof(int64_t), 0); - hipHostRegister(dataPtr32, sizeof(int32_t), 0); - - // We run all test cases twice - - // Run-1: streamWait is blocking (wait conditions is false) - // Run-2: streamWait is non-blocking (wait condition is true) - for (int run = 0; run < 2; run++) { - bool isBlocking = run == 0; - - for (const auto& tc : testCases) { - *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; - *dataPtr64 = DATA_INIT; - - std::thread waitThenUpdate64(waitAndWrite64, stream, signalPtr, tc, dataPtr64); - - if (isBlocking) { - // For DEBUG only - // usleep(500); - // HIPASSERT(*dataPtr32 == DATA_INIT); - - // NOTE: Any HIP API call here that takes device execution lock can lead to a deadlock - // since above write command waits for waitValue command if constant memeory filled up. - - // update signal to unblock the wait. - *signalPtr = tc.signalValuePass; - } - waitThenUpdate64.join(); - hipStreamSynchronize(stream); - HIPASSERT(*dataPtr64 == DATA_UPDATE); - - // 32-bit API - *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; - *dataPtr32 = DATA_INIT; - - std::thread waitThenUpdate32(waitAndWrite32, stream, signalPtr, tc, dataPtr32); - - if (isBlocking) { - // For DEBUG only - // usleep(500); - // HIPASSERT(*dataPtr32 == DATA_INIT); - - // NOTE: Any HIP API call here that takes device execution lock can lead to a deadlock - // since above write command waits for waitValue command if constant memeory filled up. - - // update signal to unblock the wait. - *signalPtr = static_cast(tc.signalValuePass); - } - waitThenUpdate32.join(); - hipStreamSynchronize(stream); - HIPASSERT(*dataPtr32 == DATA_UPDATE); - } - } - - std::cout << " hipStreamWaitValue32 with default mask: testing ... \n"; - // Run-1: streamWait is blocking (wait conditions is false) - // Run-2: streamWait is non-blocking (wait condition is true) - for (int run = 0; run < 2; run++) { - bool isBlocking = run == 0; - - for (const auto& tc : testCasesNoMask32) { - *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; - *dataPtr32 = DATA_INIT; - - std::thread waitThenUpdate32(waitAndWrite32NoMask, stream, signalPtr, tc, dataPtr32); - - if (isBlocking) { - // For DEBUG only - // usleep(500); - // HIPASSERT(*dataPtr32 == DATA_INIT); - - // NOTE: Any HIP API call here that takes device execution lock can lead to a deadlock - // since above write command waits for waitValue command if constant memeory filled up. - - // update signal to unblock the wait. - *signalPtr = tc.signalValuePass; - } - waitThenUpdate32.join(); - hipStreamSynchronize(stream); - HIPASSERT(*dataPtr32 == DATA_UPDATE); - } - } - - std::cout << " hipStreamWaitValue64 with default mask: testing ... \n"; - // Run-1: streamWait is blocking (wait conditions is false) - // Run-2: streamWait is non-blocking (wait condition is true) - for (int run = 0; run < 2; run++) { - bool isBlocking = run == 0; - - for (const auto& tc : testCasesNoMask64) { - *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; - *dataPtr64 = DATA_INIT; - - std::thread waitThenUpdate64(waitAndWrite64NoMask, stream, signalPtr, tc, dataPtr64); - - if (isBlocking) { - // For DEBUG only - // usleep(500); - // HIPASSERT(*dataPtr64 == DATA_INIT); - - // NOTE: Any HIP API call here that takes device execution lock can lead to a deadlock - // since above write command waits for waitValue command if constant memeory filled up. - - // update signal to unblock the wait. - *signalPtr = tc.signalValuePass; - } - waitThenUpdate64.join(); - hipStreamSynchronize(stream); - HIPASSERT(*dataPtr64 == DATA_UPDATE); - } - } - - // Cleanup - HIPCHECK(hipFree(signalPtr)); - hipHostUnregister(dataPtr64); - hipHostUnregister(dataPtr32); - free(dataPtr64); - free(dataPtr32); - hipStreamDestroy(stream); -} - -int main() { - testWrite(); - testWait(); - passed(); -} diff --git a/tests/src/runtimeApi/synchronization/cache_coherency_cpu_gpu.cpp b/tests/src/runtimeApi/synchronization/cache_coherency_cpu_gpu.cpp deleted file mode 100644 index dae95f1264..0000000000 --- a/tests/src/runtimeApi/synchronization/cache_coherency_cpu_gpu.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for Fine Grained CPU-GPU coherency. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia LINK_OPTIONS -lpthread - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" -#include -#include - -typedef _Atomic(unsigned int) atomic_uint; - -// Helper function to spin on address until address equals value. -// If the address holds the value of -1, abort because the other thread failed. -__device__ void -gpu_spin_loop_or_abort_on_negative_one(unsigned int* address, - unsigned int value) { - unsigned int compare; - bool check = false; - do { - compare = value; - check = __opencl_atomic_compare_exchange_strong( - (atomic_uint*)address, /*expected=*/ &compare, /*desired=*/ value, - __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, - /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - if (compare == -1) - abort(); - } while(!check); -} - -// This kernel requires a single block, single thread dispatch. -__global__ void -gpu_kernel(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2) { - for (size_t i = 0; i < N; i++) { - // Store data into A, system fence, and atomically mark flag. - // This guarantees this global write is visible by device 1. - A[i] = X[i]; - __opencl_atomic_fetch_add((atomic_uint*)AA1, 1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on device 1's global write to B. - gpu_spin_loop_or_abort_on_negative_one(BA1, i+1); - - // Check device 1 properly stored Y into B. - bool stored_data_matches = (B[i] == Y[i]); - if(!stored_data_matches) { - // If the data does not match, alert other thread and abort. - printf("FAIL: at i=%lu, B[i]=%d, which does not match Y[i]=%d.\n", - i, B[i], Y[i]); - __opencl_atomic_exchange((atomic_uint*)AA2, -1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - abort(); - } - // Otherwise tell the other thread to continue. - __opencl_atomic_fetch_add((atomic_uint*)AA2, 1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on kernel gpu_cache1 to finish checking X is stored in A. - gpu_spin_loop_or_abort_on_negative_one(BA2, i+1); - } -} - -__host__ void -cpu_spin_loop_or_abort_on_negative_one(unsigned int* address, - unsigned int value) { - unsigned int compare; - bool check = false; - do { - compare = value; - check = __atomic_compare_exchange_n( - address, /*expected=*/ &compare, /*desired=*/ value, - /*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); - if (compare == -1) - abort(); - } while(!check); -} - -// This host thread runs only on a single CPU thread. -__host__ void -cpu_thread(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2) { - for (size_t i = 0; i < N; i++) { - B[i] = Y[i]; - __atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE); - cpu_spin_loop_or_abort_on_negative_one(AA1, i+1); - - bool stored_data_matches = (A[i] == X[i]); - if(!stored_data_matches) { - printf("FAIL: at i=%lu, A[i]=%d, which does not match X[i]=%d.\n", - i, A[i], X[i]); - __atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE); - abort(); - } - __atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE); - cpu_spin_loop_or_abort_on_negative_one(AA2, i+1); - } -} - -// This test runs on devices where XGMI enables fine-grained communication -// between GPUs. This performs a message passing test. -// Array A is allocated on Device 0, and remotely on host. -// Device 0 also increments atomic ints AA1 and AA2. -// Array B is allocated on host, and remotely on Device 0. -// Host also increments atomic ints BA1 and BA2. -// Kernel will launch on Device 0, and store array X into array A. -// Host Thread will store array Y into array B. -// Kernel will validate that the correct values of array Y are stored in B. -// Host Thread will validate that the correct values of array X are stored in A. - -bool cpu_to_gpu_coherency() { - int *A_d, *B_d, *X_d, *Y_d; - int *A_res, *A_h, *B_h, *X_h, *Y_h; - size_t N = 1024; - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices < 1) { - printf("info: no GPU detected.\n"); - return false; - } - - // Skip this test if feature is not supported. - static int device0 = 0; - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device0)); - if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 && - strncmp(props.gcnArchName, "gfx940", 6) != 0) { - printf("info: skipping test on devices other than gfx90a and gfx940.\n"); - return true; - } - - // Allocate Host Side Memory. Coherent Fine-grained Memory for array B. - printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - HIPCHECK(hipHostMalloc(&B_h, Nbytes, (hipHostMallocCoherent | hipHostMallocMapped))); - HIPCHECK(hipHostGetDevicePointer((void**)&B_d, B_h, 0)); - X_h = (int*)malloc(Nbytes); HIPCHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - Y_h = (int*)malloc(Nbytes); HIPCHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - - // Initialize the arrays and atomic variables. - for (size_t i = 0; i < N; i++) - { - X_h[i] = 100000000 + i; - Y_h[i] = 300000000 + i; - } - - // Initialize shared atomic flags between CPU and GPU. - unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h; - unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d; - HIPCHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&AA1_d, AA1_h, 0)); *AA1_h = 0; - HIPCHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&AA2_d, AA2_h, 0)); *AA2_h = 0; - HIPCHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&BA1_d, BA1_h, 0)); *BA1_h = 0; - HIPCHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&BA2_d, BA2_h, 0)); *BA2_h = 0; - - // Skip the first stream, ensure stream is non-blocking. - hipStream_t stream[2]; - HIPCHECK(hipStreamCreate(&stream[0])); - HIPCHECK(hipSetDevice(0)); - HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); - - // Allocate Device Side Memory. Coherent Fine-grained Memory for array A. - printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - hipError_t status = hipExtMallocWithFlags((void**)&A_d, Nbytes, hipDeviceMallocFinegrained); - if (status == hipErrorOutOfMemory || A_d == 0 ){ - printf("info: device fine-grained memory not supported on this config\n"); - printf("info: skipping this CPU-GPU coherency test\n"); - return true; - } else if (status != hipSuccess) { - printf("error: failed to allocate device fine-grain memory\n"); - return false; - } - // SVM memory - host pointer is the same as device pointer to array A. - A_h = A_d; - HIPCHECK(hipMalloc(&X_d, Nbytes)); - HIPCHECK(hipMalloc(&Y_d, Nbytes)); - - printf("info: copy Host2Device\n"); - HIPCHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice)); - - // Launch the GPU kernel. - printf("info: prepare args and launch params for device 0\n"); - const unsigned blocks = 1; - const unsigned threadsPerBlock = 1; - printf("info: launch gpu_kernel\n"); - hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock), - 0, stream[1], - A_d, B_d, X_d, Y_d, N, - AA1_d, AA2_d, BA1_d, BA2_d); - // Check if launch failed. - HIPCHECK(hipGetLastError()); - - // Do not sync the launched stream, instead run the cpu_thread. - printf("info: start cpu_thread\n"); - std::thread host_thread(cpu_thread, - A_h, B_h, X_h, Y_h, N, - AA1_h, AA2_h, BA1_h, BA2_h); - host_thread.detach(); - - // Wait for Device side to finish. - HIPCHECK(hipStreamSynchronize(stream[1])); - - // Evaluate the resultant arrays A and B. - printf("info: copy Device2Host\n"); - A_res = (int*)malloc(Nbytes); HIPCHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess ); - HIPCHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost)); - printf("info: check result\n"); - for (size_t i = 0; i < N; i++) { - assert(A_res[i] == (100000000 + i)); - assert(B_h[i] == (300000000 + i)); - } - - // Free all the device and host memory allocated. - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(X_d)); - HIPCHECK(hipFree(Y_d)); - HIPCHECK(hipHostFree(AA1_h)); - HIPCHECK(hipHostFree(AA2_h)); - HIPCHECK(hipHostFree(BA1_h)); - HIPCHECK(hipHostFree(BA2_h)); - HIPCHECK(hipHostFree(B_h)); - free(X_h); - free(Y_h); - free(A_res); - - printf("info: finished CPU-GPU test!\n"); - return true; -} - -int main(int argc, char *argv[]) { - bool passed = true; - - // Coherency between CPU and GPU sharing host and device memory. - passed = passed & cpu_to_gpu_coherency(); - - if (passed) - passed(); - return passed; -} diff --git a/tests/src/runtimeApi/synchronization/cache_coherency_gpu_gpu.cpp b/tests/src/runtimeApi/synchronization/cache_coherency_gpu_gpu.cpp deleted file mode 100644 index 78863b5351..0000000000 --- a/tests/src/runtimeApi/synchronization/cache_coherency_gpu_gpu.cpp +++ /dev/null @@ -1,284 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for Fine Grained GPU-GPU coherency. - -/* HIT_START - * BUILD: %t %s ../../test_common.cpp HIPCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" -#include - -typedef _Atomic(unsigned int) atomic_uint; - -// Helper function to spin on address until address equals value. -// If the address holds the value of -1, abort because the other thread failed. -__device__ void -gpu_spin_loop_or_abort_on_negative_one(unsigned int* address, - unsigned int value) { - unsigned int compare; - bool check = false; - do { - compare = value; - check = __opencl_atomic_compare_exchange_strong( - (atomic_uint*)address, /*expected=*/ &compare, /*desired=*/ value, - __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, - /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - if (compare == -1) - abort(); - } while(!check); -} - -// This kernel requires a single block, single thread dispatch. -__global__ void -gpu_cache0(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2) { - for (size_t i = 0; i < N; i++) { - // Store data into A, system fence, and atomically mark flag. - // This guarantees this global write is visible by device 1. - A[i] = X[i]; - __opencl_atomic_fetch_add((atomic_uint*)AA1, 1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on device 1's global write to B. - gpu_spin_loop_or_abort_on_negative_one(BA1, i+1); - - // Check device 1 properly stored Y into B. - bool stored_data_matches = (B[i] == Y[i]); - if(!stored_data_matches) { - // If the data does not match, alert other thread and abort. - printf("FAIL: at i=%lu, B[i]=%d, which does not match Y[i]=%d.\n", - i, B[i], Y[i]); - __opencl_atomic_exchange((atomic_uint*)AA2, -1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - abort(); - } - // Otherwise tell the other thread to continue. - __opencl_atomic_fetch_add((atomic_uint*)AA2, 1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on kernel gpu_cache1 to finish checking X is stored in A. - gpu_spin_loop_or_abort_on_negative_one(BA2, i+1); - } -} - -// This kernel requires a single block, single thread dispatch. -__global__ void -gpu_cache1(int *A,int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2) { - for (size_t i = 0; i < N; i++) { - B[i] = Y[i]; - __opencl_atomic_fetch_add((atomic_uint*)BA1, 1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - gpu_spin_loop_or_abort_on_negative_one(AA1, i+1); - - bool stored_data_matches = (A[i] == X[i]); - if(!stored_data_matches) { - printf("FAIL: at i=%lu, A[i]=%d, which does not match X[i]=%d.\n", - i, A[i], X[i]); - __opencl_atomic_exchange((atomic_uint*)BA2, -1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - abort(); - } - __opencl_atomic_fetch_add((atomic_uint*)BA2, 1, __ATOMIC_RELEASE, - __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - gpu_spin_loop_or_abort_on_negative_one(AA2, i+1); - } -} - -// This test runs on devices where XGMI enables fine-grained communication -// between GPUs. This performs a message passing test. -// Array A is allocated on Device 0, and remotely on Device 1. -// Device 0 also increments atomic ints AA1 and AA2. -// Array B is allocated on Device 1, and remotely on Device 0. -// Device 1 also increments atomic ints BA1 and BA2. -// Kernel 0 will launch on Device 0, and store array X into array A. -// Kernel 1 will launch on Device 1, and store array Y into array B. -// Kernel 0 will validate that the correct values of array Y are stored in B. -// Kernel 1 will validate that the correct values of array X are stored in A. - -bool gpu_to_gpu_coherency() { - int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1; - int *A_h, *B_h, *X_h, *Y_h; - size_t N = 1024; - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int numTestDevices = 2; - - HIPCHECK(hipGetDeviceCount(&numDevices)); - if (numDevices < numTestDevices) { - printf("info: less than 2 GPUs. skipping multi-GPU test!\n"); - return true; - } - printf("info: performing this test only on first two GPUs.\n"); - - // Skip this test if either device does not support this feature. - hipDeviceProp_t props0, props1; - HIPCHECK(hipGetDeviceProperties(&props0, 0)); - HIPCHECK(hipGetDeviceProperties(&props1, 1)); - if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 || - strncmp(props1.gcnArchName, "gfx90a", 6) != 0) && - (strncmp(props0.gcnArchName, "gfx940", 6) != 0 || - strncmp(props1.gcnArchName, "gfx940", 6) != 0)) { - printf("info: skipping test on devices other than gfx90a and gfx940.\n"); - return true; - } - - // Allocate Host Side Memory. - printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - A_h = (int*)malloc(Nbytes); HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - B_h = (int*)malloc(Nbytes); HIPCHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - X_h = (int*)malloc(Nbytes); HIPCHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - Y_h = (int*)malloc(Nbytes); HIPCHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess ); - - // Initialize the arrays and atomic variables. - for (size_t i = 0; i < N; i++) - { - X_h[i] = 100000000 + i; - Y_h[i] = 300000000 + i; - } - - // Initialize shared atomic flags on host coherent memory. - unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h; - unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d; - HIPCHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&AA1_d, AA1_h, 0)); *AA1_h = 0; - HIPCHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&AA2_d, AA2_h, 0)); *AA2_h = 0; - HIPCHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&BA1_d, BA1_h, 0)); *BA1_h = 0; - HIPCHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIPCHECK(hipHostGetDevicePointer((void**)&BA2_d, BA2_h, 0)); *BA2_h = 0; - - // Skip the first stream. - hipStream_t stream[numTestDevices + 1]; - HIPCHECK(hipStreamCreate(&stream[0])); - - // Set-up Device 0. - HIPCHECK(hipSetDevice(0)); - // Enable P2P access to Device 1. - HIPCHECK(hipDeviceEnablePeerAccess(1,0)); - HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); - // Allocating Coherent Memory for Array A_d on Device 0. - printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - hipError_t status = hipExtMallocWithFlags((void**)&A_d, Nbytes, hipDeviceMallocFinegrained); - if (status == hipErrorOutOfMemory || A_d == 0 ) { - printf("info: device fine-grained memory not supported on this config\n"); - printf("info: skipping this GPU-GPU coherency test\n"); - return true; - } else if (status != hipSuccess) { - printf("error: failed to allocate device 0 fine-grain memory\n"); - return false; - } - HIPCHECK(hipMalloc(&X_d0, Nbytes)); - HIPCHECK(hipMalloc(&Y_d0, Nbytes)); - - // Set-up Device 1. - HIPCHECK(hipSetDevice(1)); - // Enable P2P access to Device 0. - HIPCHECK(hipDeviceEnablePeerAccess(0,0)); - HIPCHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking)); - // Allocating Coherent Memory for Array B_d on Device 1. - printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - status = hipExtMallocWithFlags((void**)&B_d, Nbytes, hipDeviceMallocFinegrained); - if (status == hipErrorOutOfMemory || B_d == 0 ) { - printf("info: device fine-grained memory not supported on this config\n"); - printf("info: skipping this GPU-GPU coherency test\n"); - return true; - } else if (status != hipSuccess) { - printf("error: failed to allocate device 1 fine-grain memory\n"); - return false; - } - HIPCHECK(hipMalloc(&X_d1, Nbytes)); - HIPCHECK(hipMalloc(&Y_d1, Nbytes)); - - // Transfer initialized data onto the device arrays. - printf("info: copy Host2Device\n"); - HIPCHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice)); - HIPCHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice)); - - // Prepare and launch the device kernels. - const unsigned blocks = 1; - const unsigned threadsPerBlock = 1; - HIPCHECK(hipSetDevice(0)); - printf("info: launch gpu kernel 0\n"); - hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock), - 0, stream[1], - A_d, B_d, X_d0, Y_d0, N, - AA1_d, AA2_d, BA1_d, BA2_d); - // Check if launch failed. - HIPCHECK(hipGetLastError()); - - HIPCHECK(hipSetDevice(1)); - printf("info: launch gpu kernel 1\n"); - hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock), - 0, stream[2], - A_d, B_d, X_d1, Y_d1, N, - AA1_d, AA2_d, BA1_d, BA2_d); - HIPCHECK(hipGetLastError()); - - // Wait for kernels on both devices. - HIPCHECK(hipStreamSynchronize(stream[1])); - HIPCHECK(hipStreamSynchronize(stream[2])); - - // Evaluate the resultant arrays A and B. - printf("info: copy Device2Host\n"); - HIPCHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - HIPCHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost)); - printf("info: check result\n"); - for (size_t i = 0; i < N; i++) { - assert(A_h[i] == (100000000 + i)); - assert(B_h[i] == (300000000 + i)); - } - - // Free all the device and host memory allocated. - HIPCHECK(hipFree(A_d)); - HIPCHECK(hipFree(B_d)); - HIPCHECK(hipFree(X_d0)); - HIPCHECK(hipFree(Y_d0)); - HIPCHECK(hipFree(X_d1)); - HIPCHECK(hipFree(Y_d1)); - HIPCHECK(hipHostFree(AA1_h)); - HIPCHECK(hipHostFree(AA2_h)); - HIPCHECK(hipHostFree(BA1_h)); - HIPCHECK(hipHostFree(BA2_h)); - free(A_h); - free(B_h); - free(X_h); - free(Y_h); - - printf("info: finished GPU-GPU coherency test!\n"); - return true; -} - -int main(int argc, char *argv[]) { - bool passed = true; - - // Coherency between GPUs accessing local or remote FB. - passed = passed & gpu_to_gpu_coherency(); - - if (passed) - passed(); - return passed; -} diff --git a/tests/src/runtimeApi/synchronization/copy_coherency.cpp b/tests/src/runtimeApi/synchronization/copy_coherency.cpp deleted file mode 100644 index 7452dbf34f..0000000000 --- a/tests/src/runtimeApi/synchronization/copy_coherency.cpp +++ /dev/null @@ -1,370 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// ROCM_TARGET=gfx900 hipcc --genco memcpyInt.device.cpp -o memcpyInt.hsaco -// hipcc copy_coherency.cpp -I ~/X/HIP/tests/src/ ~/X/HIP/tests/src/test_common.cpp - - -// TODO - add code object support here. -/* HIT_START - * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * TEST: %t - * HIT_END - */ - - -// Test cache management (fences) and synchronization between kernel and copy commands. -// Exhaustively tests 3 command types (copy, kernel, module kernel), -// many sync types (see SyncType), followed by another command, across a sweep -// of data sizes designed to stress various levels of the memory hierarchy. - -#include "hip/hip_runtime.h" -#include "test_common.h" - -// TODO - turn this back on when test infra can copy the module files to use as test inputs. -#define SKIP_MODULE_KERNEL 1 - - -class MemcpyFunction { - public: - MemcpyFunction(const char* fileName, const char* functionName) { - load(fileName, functionName); - }; - void load(const char* fileName, const char* functionName); - void launch(int* dst, const int* src, size_t numElements, hipStream_t s); - - private: - hipFunction_t _function; - hipModule_t _module; -}; - - -void MemcpyFunction::load(const char* fileName, const char* functionName) { -#if SKIP_MODULE_KERNEL != 1 - HIPCHECK(hipModuleLoad(&_module, fileName)); - HIPCHECK(hipModuleGetFunction(&_function, _module, functionName)); -#endif -}; - - -void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { - struct { - int* _dst; - const int* _src; - size_t _numElements; - } args; - - args._dst = dst; - args._src = src; - args._numElements = numElements; - - size_t size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - HIPCHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock, 1, 1, - 0 /*dynamicShared*/, s, NULL, (void**)&config)); -}; - -bool g_warnOnFail = true; -// int g_elementSizes[] = {1, 16, 1024, 524288, 16*1000*1000}; // TODO -int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000}; - -MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); - - -// Set value of array to specified 32-bit integer: -__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - ptr[i] = val; - } -}; - -__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - dst[i] = src[i]; - } -}; - - -// CHeck arrays in reverse order, to more easily detect cases where -// the copy is "partially" done. -void checkReverse(const int* ptr, int numElements, int expected) { - int mismatchCnt = 0; - for (int i = numElements - 1; i >= 0; i--) { - if (ptr[i] != expected) { - fprintf(stderr, "%s**error: i=%d, ptr[i] == (%x) , does not equal expected (%x)\n%s", - KRED, i, ptr[i], expected, KNRM); - if (!g_warnOnFail) { - assert(ptr[i] == expected); - } - if (++mismatchCnt >= 10) { - break; - } - } - } - - fprintf(stderr, "test: OK\n"); -} - -#define ENUM_CASE_STR(x) \ - case x: \ - return #x - -enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType }; - - -const char* CmdTypeStr(CmdType c) { - switch (c) { - ENUM_CASE_STR(COPY); - ENUM_CASE_STR(KERNEL); - ENUM_CASE_STR(MODULE_KERNEL); - default: - return "UNKNOWN"; - }; -} - - -enum SyncType { - NONE, - EVENT_QUERY, - EVENT_SYNC, - STREAM_WAIT_EVENT, - STREAM_QUERY, - STREAM_SYNC, - DEVICE_SYNC, - MAX_SyncType -}; - - -const char* SyncTypeStr(SyncType s) { - switch (s) { - ENUM_CASE_STR(NONE); - ENUM_CASE_STR(EVENT_QUERY); - ENUM_CASE_STR(EVENT_SYNC); - ENUM_CASE_STR(STREAM_WAIT_EVENT); - ENUM_CASE_STR(STREAM_QUERY); - ENUM_CASE_STR(STREAM_SYNC); - ENUM_CASE_STR(DEVICE_SYNC); - default: - return "UNKNOWN"; - }; -}; - - -void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s, size_t numElements) { - switch (cmd) { - case COPY: - HIPCHECK( - hipMemcpyAsync(dst, src, numElements * sizeof(int), hipMemcpyDeviceToDevice, s)); - break; - case KERNEL: { - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, s, dst, src, - numElements); - } break; - case MODULE_KERNEL: - g_moduleMemcpy.launch(dst, src, numElements, s); - break; - default: - failed("unknown cmd=%d type", cmd); - }; -} - -void resetInputs(int* Ad, int* Bd, int* Cd, int* Ch, size_t numElements, int expected) { - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, hipStream_t(0), Ad, - expected, numElements); - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, hipStream_t(0), Bd, - 0xDEADBEEF, - numElements); // poison with bad value to ensure is overwritten correctly - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), 0, hipStream_t(0), Bd, - 0xF000BA55, - numElements); // poison with bad value to ensure is overwritten correctly - memset(Ch, 13, - numElements * sizeof(int)); // poison with bad value to ensure is overwritten correctly - HIPCHECK(hipDeviceSynchronize()); -} - -// Intended to test proper synchronization and cache flushing between CMDA and CMDB. -// CMD are of type CmdType. All command copy memory, using either hipMemcpyAsync or kernel -// implementations. CmdA copies from Ad to Bd, Some form of synchronization is applied. Then cmdB -// copies from Bd to Cd. -// -// Cd is then copied to host Ch using a memory copy. -// -// Correct result at the end is that Ch contains the contents originally in Ad (integer 0x42) -void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, hipStream_t stream1, - hipStream_t stream2, int numElements, int* Ad, int* Bd, int* Cd, int* Ch, - int expected) { - hipEvent_t e; - HIPCHECK(hipEventCreateWithFlags(&e, 0)); - - resetInputs(Ad, Bd, Cd, Ch, numElements, expected); - - const size_t sizeElements = numElements * sizeof(int); - fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", - sizeElements, (double)(sizeElements / 1024.0), CmdTypeStr(cmdAType), - SyncTypeStr(syncType), CmdTypeStr(cmdBType)); - - if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { - fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); - return; - } - - - // Step A: - runCmd(cmdAType, Bd, Ad, stream1, numElements); - - - // Sync in-between? - switch (syncType) { - case NONE: - break; - case EVENT_QUERY: { - hipError_t st = hipErrorNotReady; - HIPCHECK(hipEventRecord(e, stream1)); - do { - st = hipEventQuery(e); - } while (st == hipErrorNotReady); - HIPCHECK(st); - } break; - case EVENT_SYNC: - HIPCHECK(hipEventRecord(e, stream1)); - HIPCHECK(hipEventSynchronize(e)); - break; - case STREAM_WAIT_EVENT: - HIPCHECK(hipEventRecord(e, stream1)); - HIPCHECK(hipStreamWaitEvent(stream2, e, 0)); - break; - case STREAM_QUERY: { - hipError_t st = hipErrorNotReady; - do { - st = hipStreamQuery(stream1); - } while (st == hipErrorNotReady); - HIPCHECK(st); - } break; - case STREAM_SYNC: - HIPCHECK(hipStreamSynchronize(stream1)); - break; - case DEVICE_SYNC: - HIPCHECK(hipDeviceSynchronize()); - break; - default: - fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType)); - return; // FIXME, this doesn't clean up - // failed("unknown sync type=%s", SyncTypeStr(syncType)); - }; - - - runCmd(cmdBType, Cd, Bd, stream2, numElements); - - - // Copy back to host, use async copy to avoid any extra synchronization that might mask issues. - HIPCHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost, stream2)); - HIPCHECK(hipStreamSynchronize(stream2)); - - checkReverse(Ch, numElements, expected); - - HIPCHECK(hipEventDestroy(e)); -}; - - -void testWrapper(size_t numElements) { - const size_t sizeElements = numElements * sizeof(int); - const int expected = 0x42; - int *Ad, *Bd, *Cd, *Ch; - - HIPCHECK(hipMalloc(&Ad, sizeElements)); - HIPCHECK(hipMalloc(&Bd, sizeElements)); - HIPCHECK(hipMalloc(&Cd, sizeElements)); - HIPCHECK(hipHostMalloc(&Ch, sizeElements)); // Ch is the end array - - - hipStream_t stream1, stream2; - - HIPCHECK(hipStreamCreate(&stream1)); - HIPCHECK(hipStreamCreate(&stream2)); - - - HIPCHECK(hipDeviceSynchronize()); - fprintf(stderr, "test: init complete, start running tests\n"); - - - runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); - - for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) { - for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) { - for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) { - switch (syncMode) { - // case NONE:: - case EVENT_QUERY: - case EVENT_SYNC: - case STREAM_WAIT_EVENT: - // case STREAM_QUERY: - case STREAM_SYNC: - case DEVICE_SYNC: - runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB), stream1, - stream2, numElements, Ad, Bd, Cd, Ch, expected); - break; - default: - break; - } - } - } - } - -#if 0 - runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); - runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); - runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); - - runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); -#endif - - HIPCHECK(hipFree(Ad)); - HIPCHECK(hipFree(Bd)); - HIPCHECK(hipFree(Cd)); - HIPCHECK(hipHostFree(Ch)); - - HIPCHECK(hipStreamDestroy(stream1)); - HIPCHECK(hipStreamDestroy(stream2)); -} - - -int main(int argc, char* argv[]) { - for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) { - size_t numElements = g_elementSizes[index]; - testWrapper(numElements); - } - - passed(); -} - - -// TODO -// - test environment variables diff --git a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp b/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp deleted file mode 100644 index 49fc9a18a7..0000000000 --- a/tests/src/runtimeApi/synchronization/memcpyInt.device.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include - - -extern "C" __global__ void memcpyIntKernel(hipLaunchParm lp, int* dst, const int* src, - size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - dst[i] = src[i]; - } -}; diff --git a/tests/src/specialFunc.cu b/tests/src/specialFunc.cu deleted file mode 100644 index 17f9f8d5d8..0000000000 --- a/tests/src/specialFunc.cu +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -//Test to ensure hipify runs correctly. -// Hipify may report warnings for some missing/unsupported functions - -void __global__ -test_kernel(float *A) -{ - int tid = blockIdx.x * blockDim.x + threadIdx.x; - - float a = __ballot(tid < 16); - float b = __shfl(tid < 16); -} diff --git a/tests/src/stress/README.md b/tests/src/stress/README.md deleted file mode 100644 index fc8afedd21..0000000000 --- a/tests/src/stress/README.md +++ /dev/null @@ -1,2 +0,0 @@ -## Stress Tests -This directory consists of stress tests for HIP diff --git a/tests/src/stress/hipStressAsync.cpp b/tests/src/stress/hipStressAsync.cpp deleted file mode 100644 index 502c9948ef..0000000000 --- a/tests/src/stress/hipStressAsync.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Test for checking the functionality of - * hipError_t hipDeviceSynchronize(); - */ - -#include "hip/hip_runtime.h" -#include - -#define _SIZE sizeof(int) * 1024 * 1024 -#define NUM_STREAMS 20 -#define ITER 1 << 10 - -__global__ void Iter(hipLaunchParm lp, int* Ad, int num) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if (tx == 0) { - for (int i = 0; i < num; i++) { - Ad[tx] += 1; - } - } -} - -int main() { - int* A[NUM_STREAMS]; - int* Ad[NUM_STREAMS]; - hipStream_t stream[NUM_STREAMS]; - for (int i = 0; i < NUM_STREAMS; i++) { - hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault); - A[i][0] = 1; - hipMalloc((void**)&Ad[i], _SIZE); - hipStreamCreate(&stream[i]); - } - for (int i = 0; i < NUM_STREAMS; i++) { - for (int j = 0; j < ITER; j++) { - std::cout << "Iter: " << j << std::endl; - hipMemcpyAsync(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice, stream[i]); - hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, stream[i], Ad[i], 1 << 30); - hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, stream[i]); - } - } - - std::cout << "Waitin..." << std::endl; - - hipDeviceSynchronize(); -} diff --git a/tests/src/stress/hipStressChain.cpp b/tests/src/stress/hipStressChain.cpp deleted file mode 100644 index 20d0777b4a..0000000000 --- a/tests/src/stress/hipStressChain.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" -#include -#include - -#define NUM_SIZE 8 -#define NUM_ITER 1 << 30 -static size_t size[NUM_SIZE]; - -__global__ void Add(hipLaunchParm lp, int* Ad) { - int tx = threadIdx.x; - Ad[tx] = Ad[tx] + tx; -} - -void setup() { - for (int i = 0; i < NUM_SIZE; i++) { - size[i] = 1 << (i + 6); // start at 8 bytes - } -} - -void valSet(int* A, int val, size_t size) { - size_t len = size / sizeof(int); - for (int i = 0; i < len; i++) { - A[i] = val; - } -} - -int main() { - setup(); - int *A, *Ad; - for (int i = 0; i < NUM_SIZE; i++) { - A = (int*)malloc(size[i]); - valSet(A, 1, size[i]); - hipMalloc(&Ad, size[i]); - std::cout << "Malloc success at size: " << size[i] << std::endl; - - for (int j = 0; j < NUM_ITER; j++) { - std::cout << "Iter: " << j << std::endl; - hipMemcpy(Ad, A, size[i], hipMemcpyHostToDevice); - hipLaunchKernel(Add, dim3(1), dim3(size[i] / sizeof(int)), 0, 0, Ad); - hipMemcpy(A, Ad, size[i], hipMemcpyDeviceToHost); - } - - hipDeviceSynchronize(); - } -} diff --git a/tests/src/stress/hipStressKernel.cpp b/tests/src/stress/hipStressKernel.cpp deleted file mode 100644 index 40df687556..0000000000 --- a/tests/src/stress/hipStressKernel.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" -#include -#include - -#define NUM_SIZE 8 -#define NUM_ITER 1 << 30 -static size_t size[NUM_SIZE]; - -__global__ void Add(hipLaunchParm lp, int* Ad) { - int tx = threadIdx.x; - Ad[tx] = Ad[tx] + tx; -} - -void setup() { - for (int i = 0; i < NUM_SIZE; i++) { - size[i] = 1 << (i + 6); // start at 8 bytes - } -} - -void valSet(int* A, int val, size_t size) { - size_t len = size / sizeof(int); - for (int i = 0; i < len; i++) { - A[i] = val; - } -} - -int main() { - setup(); - int *A, *Ad; - for (int i = 0; i < NUM_SIZE; i++) { - A = (int*)malloc(size[i]); - valSet(A, 1, size[i]); - hipMalloc(&Ad, size[i]); - std::cout << "Malloc success at size: " << size[i] << std::endl; - for (int j = 0; j < NUM_ITER; j++) { - std::cout << "\r" - << "Iter: " << j; - hipLaunchKernel(Add, dim3(1), dim3(size[i] / sizeof(int)), 0, 0, Ad); - } - std::cout << std::endl; - hipDeviceSynchronize(); - - free(A); - hipFree(Ad); - } -} diff --git a/tests/src/stress/hipStressMemcpy.cpp b/tests/src/stress/hipStressMemcpy.cpp deleted file mode 100644 index 21d1e306f5..0000000000 --- a/tests/src/stress/hipStressMemcpy.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hip/hip_runtime.h" -#include -#include - -#define NUM_SIZE 8 -#define NUM_ITER 1 << 30 -static size_t size[NUM_SIZE]; - -void setup() { - for (int i = 0; i < NUM_SIZE; i++) { - size[i] = 1 << (i + 6); // start at 8 bytes - } -} - -void valSet(int* A, int val, size_t size) { - size_t len = size / sizeof(int); - for (int i = 0; i < len; i++) { - A[i] = val; - } -} - -int main() { - setup(); - int *A, *Ad; - for (int i = 0; i < NUM_SIZE; i++) { - A = (int*)malloc(size[i]); - valSet(A, 1, size[i]); - hipMalloc(&Ad, size[i]); - std::cout << "Malloc success at size: " << size[i] << std::endl; - for (int j = 0; j < NUM_ITER; j++) { - std::cout << "\r" - << "Iter: " << j; - hipMemcpy(Ad, A, size[i], hipMemcpyHostToDevice); - } - std::cout << std::endl; - hipDeviceSynchronize(); - } -} diff --git a/tests/src/stress/hipStressSync.cpp b/tests/src/stress/hipStressSync.cpp deleted file mode 100644 index 8c0ff9bf4f..0000000000 --- a/tests/src/stress/hipStressSync.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * Test for checking the functionality of - * hipError_t hipDeviceSynchronize(); - */ - -#include "hip/hip_runtime.h" -#include - -#define _SIZE sizeof(int) * 1024 * 1024 -#define NUM_STREAMS 20 -#define ITER 1 << 10 - -__global__ void Iter(hipLaunchParm lp, int* Ad, int num) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if (tx == 0) { - for (int i = 0; i < num; i++) { - Ad[tx] += 1; - } - } -} - -int main() { - int* A[NUM_STREAMS]; - int* Ad[NUM_STREAMS]; - hipStream_t stream[NUM_STREAMS]; - for (int i = 0; i < NUM_STREAMS; i++) { - hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault); - A[i][0] = 1; - hipMalloc((void**)&Ad[i], _SIZE); - } - for (int i = 0; i < NUM_STREAMS; i++) { - for (int j = 0; j < ITER; j++) { - std::cout << "Iter: " << j << std::endl; - hipMemcpy(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, 0, Ad[i], 1 << 30); - hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost); - } - } - - std::cout << "Waitin..." << std::endl; - - hipDeviceSynchronize(); -} diff --git a/tests/src/surface/hipSurfaceObj2D.cpp b/tests/src/surface/hipSurfaceObj2D.cpp deleted file mode 100644 index 9f4c24e492..0000000000 --- a/tests/src/surface/hipSurfaceObj2D.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_RUNTIME rocclr - * TEST: %t - * HIT_END - */ -#include - -#include -#include "test_common.h" - -__global__ void tex2DKernel(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, - int width, int height) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - float data; - surf2Dread(&data, surfaceObject, x * 4, y, hipBoundaryModeZero); - surf2Dwrite(data, outputSurfObj, x * 4, y, hipBoundaryModeZero); -} - -int runTest(int argc, char** argv); - -int main(int argc, char** argv) { - int testResult = runTest(argc, argv); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -int runTest(int argc, char** argv) { - int testResult = 1; - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray *hipArray, *hipOutArray; - hipMallocArray(&hipArray, &channelDesc, width, height); - - hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - // Create surface object - hipSurfaceObject_t surfaceObject = 0; - hipCreateSurfaceObject(&surfaceObject, &resDesc); - - hipMallocArray(&hipOutArray, &channelDesc, width, height); - hipResourceDesc resOutDesc; - memset(&resOutDesc, 0, sizeof(resOutDesc)); - resOutDesc.resType = hipResourceTypeArray; - resOutDesc.res.array.array = hipOutArray; - hipSurfaceObject_t outSurfaceObject = 0; - hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc); - - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, surfaceObject, - outSurfaceObject, width, height); - - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpyFromArray(hOutputData, hipOutArray, 0, 0, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = 0; - break; - } - } - } - hipDestroySurfaceObject(surfaceObject); - hipDestroySurfaceObject(outSurfaceObject); - hipFree(dData); - hipFreeArray(hipArray); - hipFreeArray(hipOutArray); - return testResult; -} diff --git a/tests/src/testAPIStream.sh b/tests/src/testAPIStream.sh deleted file mode 100755 index aac7010d2b..0000000000 --- a/tests/src/testAPIStream.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -rm $HIP_PATH/src/hip_hcc.o -mkdir build -$HIP_PATH/bin/hipcc hipAPIStreamDisable.cpp test_common.cpp -o ./build/hipAPIStreamDisable -rm $HIP_PATH/src/hip_hcc.o -$HIP_PATH/bin/hipcc hipAPIStreamEnable.cpp test_common.cpp -o ./build/hipAPIStreamEnable -rm $HIP_PATH/src/hip_hcc.o diff --git a/tests/src/test_common.cpp b/tests/src/test_common.cpp deleted file mode 100644 index ec76df0427..0000000000 --- a/tests/src/test_common.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "test_common.h" - -#include -#ifdef __linux__ -#include -#elif defined(_WIN32) -#include -#endif - -// standard global variables that can be set on command line -size_t N = 4 * 1024 * 1024; -char memsetval = 0x42; -int memsetD32val = 0xDEADBEEF; -short memsetD16val = 0xDEAD; -char memsetD8val = 0xDE; -int iterations = 1; -unsigned blocksPerCU = 6; // to hide latency -unsigned threadsPerBlock = 256; -int textureFilterMode = 0; // 0: hipFilterModePoint; 1: hipFilterModeLinear -int p_gpuDevice = 0; -unsigned p_verbose = 0; -int p_tests = -1; /*which tests to run. Interpretation is left to each test. default:all*/ -int debug_test = 0; -#ifdef _WIN64 -const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES="; -const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES="; -const char* PATH_SEPERATOR_STR = "\\"; -const char* NULL_DEVICE = "NUL:"; -#else -const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES"; -const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES"; -const char* PATH_SEPERATOR_STR = "/"; -const char* NULL_DEVICE = "/dev/null"; -#endif - -#ifdef _WIN64 -// Windows does not have rand_r, use srand and rand instead. -int rand_r(unsigned int* s) { - srand(*s); - return rand(); -} -#endif - -// Get Free Memory from the system -static size_t getMemoryAmount() { -#if __linux__ - struct sysinfo info; - int _ = sysinfo(&info); - return info.freeram / (1024 * 1024); // MB -#elif defined(_WIN32) - MEMORYSTATUSEX statex; - statex.dwLength = sizeof(statex); - GlobalMemoryStatusEx(&statex); - return (statex.ullAvailPhys / (1024 * 1024)); // MB -#endif -} - -size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) { - if (memPerThread == 0) return 0; - auto memAmount = getMemoryAmount(); - const auto processor_count = std::thread::hardware_concurrency(); - if (processor_count == 0 || memAmount == 0) return 0; - size_t thread_count = 0; - if ((processor_count * memPerThread) < memAmount) - thread_count = processor_count; - else - thread_count = reinterpret_cast(memAmount / memPerThread); - if (maxThreads > 0) { - return (thread_count > maxThreads) ? maxThreads : thread_count; - } - return thread_count; -} - -// Function to determine if the device is of gfx11 architecture -bool IsGfx11() { -#if defined(__HIP_PLATFORM_NVIDIA__) - return false; -#elif defined(__HIP_PLATFORM_AMD__) - int device = -1; - hipDeviceProp_t props{}; - HIPCHECK(hipGetDevice(&device)); - HIPCHECK(hipGetDeviceProperties(&props, device)); - - // Get GCN Arch Name and compare to check if it is gfx11 - std::string arch = std::string(props.gcnArchName); - auto pos = arch.find(":"); - if (pos != std::string::npos) - arch = arch.substr(0, pos); - - if(arch.size() >= 5) - arch = arch.substr(0,5); - - return (arch == std::string("gfx11")) ? true : false; -#else - std::cout<<"Have to be either Nvidia or AMD platform, asserting"<= argc || !HipTest::parseSize(argv[i], &N)) { - failed("Bad N size argument"); - } - } else if (!strcmp(arg, "--threadsPerBlock")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &threadsPerBlock)) { - failed("Bad threadsPerBlock argument"); - } - } else if (!strcmp(arg, "--blocksPerCU")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &blocksPerCU)) { - failed("Bad blocksPerCU argument"); - } - } else if (!strcmp(arg, "--memsetval")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetval argument"); - } - memsetval = ex; - } else if (!strcmp(arg, "--memsetD32val")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetD32val argument"); - } - memsetD32val = ex; - } else if (!strcmp(arg, "--memsetD16val")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetD16val argument"); - } - memsetD16val = ex; - } else if (!strcmp(arg, "--memsetD8val")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetD8val argument"); - } - memsetD8val = ex; - } else if (!strcmp(arg, "--textureFilterMode")) { - int mode; - if (++i >= argc || !HipTest::parseInt(argv[i], &mode)) { - failed("Bad textureFilterMode argument"); - } - textureFilterMode = mode; - } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { - failed("Bad iterations argument"); - } - } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { - failed("Bad gpuDevice argument"); - } - - } else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_verbose)) { - failed("Bad verbose argument"); - } - } else if (!strcmp(arg, "--tests") || (!strcmp(arg, "-t"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_tests)) { - failed("Bad tests argument"); - } - - } else if (!strcmp(arg, "--debug") || (!strcmp(arg, "-d"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &debug_test)) { - failed("Bad tests argument"); - } - } else { - if (failOnUndefinedArg) { - failed("Bad argument '%s'", arg); - } else { - argv[extraArgs++] = argv[i]; - } - } - }; - - return extraArgs; -} - - -unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) { - int device; - HIPCHECK(hipGetDevice(&device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - - unsigned blocks = props.multiProcessorCount * blocksPerCU; - if (blocks * threadsPerBlock > N) { - blocks = (N + threadsPerBlock - 1) / threadsPerBlock; - } - - return blocks; -} - -} // namespace HipTest diff --git a/tests/src/test_common.h b/tests/src/test_common.h deleted file mode 100644 index 853ab1f0da..0000000000 --- a/tests/src/test_common.h +++ /dev/null @@ -1,586 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * File is intended to C and CPP compliant hence any CPP specic changes - * should be added into CPP section - * - */ -#pragma once - -#ifdef __cplusplus - #include - #include - #if __CUDACC__ - #include - #else - #include - #endif -#endif - -// ************************ GCC section ************************** -#include - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - -#define HC __attribute__((hc)) - -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" -#define KGRN "\x1B[32m" -#define KYEL "\x1B[33m" -#define KBLU "\x1B[34m" -#define KMAG "\x1B[35m" -#define KCYN "\x1B[36m" -#define KWHT "\x1B[37m" - -// HIP Skip Return code set at cmake -#define HIP_SKIP_RETURN_CODE 127 -#define HIP_ENABLE_SKIP_TESTS 0 - -// Recommended thresholds for Tests -#define MAX_THREADS 100 - -inline bool hip_skip_tests_enabled() { - return HIP_ENABLE_SKIP_TESTS; -} - -inline int hip_skip_retcode() { - // HIP Skip Return code set at cmake - return HIP_SKIP_RETURN_CODE; -} - -// This must be called in the end of main() to indicate test passed with success. -// If it's called somewhere else, compiling issues or unexpected result will arise. -#define passed() \ - printf("%sPASSED!%s\n", KGRN, KNRM); \ - return 0; - -// The real "assert" would have written to stderr. But it is -// sufficient to just fflush here without getting pedantic. This also -// ensures that we don't lose any earlier writes to stdout. -#define failed(...) \ - printf("%serror: ", KRED); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - printf("error: TEST FAILED\n%s", KNRM); \ - fflush(NULL); \ - abort(); - -#define warn(...) \ - printf("%swarn: ", KYEL); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - printf("warn: TEST WARNING\n%s", KNRM); - -#define HIP_PRINT_STATUS(status) \ - std::cout << hipGetErrorName(status) << " at line: " << __LINE__ << std::endl; - -#define HIPCHECK(error) \ - { \ - hipError_t localError = error; \ - if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ - printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \ - localError, #error, __FILE__, __LINE__, KNRM); \ - failed("API returned error code."); \ - } \ - } - -#define HIPASSERT(condition) \ - if (!(condition)) { \ - failed("%sassertion %s at %s:%d%s \n", KRED, #condition, __FILE__, __LINE__, KNRM); \ - } - - -#define HIPCHECK_API(API_CALL, EXPECTED_ERROR) \ - { \ - hipError_t _e = (API_CALL); \ - if (_e != (EXPECTED_ERROR)) { \ - failed("%sAPI '%s' returned %d(%s) but test expected %d(%s) at %s:%d%s \n", KRED, \ - #API_CALL, _e, hipGetErrorName(_e), EXPECTED_ERROR, \ - hipGetErrorName(EXPECTED_ERROR), __FILE__, __LINE__, KNRM); \ - } \ - } - -#define HIPCHECK_RETURN_ONFAIL(func) \ - do { \ - hipError_t herror = (func); \ - if (herror != hipSuccess) { \ - return herror; \ - } \ - } while (0); - -#ifdef _WIN64 -#include -#define aligned_alloc(x,y) _aligned_malloc(y,x) -#define aligned_free(x) _aligned_free(x) -#define popen(x,y) _popen(x,y) -#define pclose(x) _pclose(x) -#define setenv(x,y,z) _putenv_s(x,y) -#define unsetenv _putenv -#define fileno(x) _fileno(x) -#define dup(x) _dup(x) -#define dup2(x,y) _dup2(x,y) -#define pipe(x,y,z) _pipe(x,y,z) -#define sleep(x) _sleep(x) -#else -#define aligned_free(x) free(x) -#endif - -// standard command-line variables: -extern size_t N; -extern char memsetval; -extern int memsetD32val; -extern short memsetD16val; -extern char memsetD8val; -extern int iterations; -extern unsigned blocksPerCU; -extern unsigned threadsPerBlock; -extern int textureFilterMode; -extern int p_gpuDevice; -extern unsigned p_verbose; -extern int p_tests; -extern int debug_test; -extern const char* HIP_VISIBLE_DEVICES_STR; -extern const char* CUDA_VISIBLE_DEVICES_STR; -extern const char* PATH_SEPERATOR_STR; -extern const char* NULL_DEVICE; - -// ********************* CPP section ********************* -#ifdef __cplusplus - -#ifdef __HIP_PLATFORM_HCC -#define TYPENAME(T) typeid(T).name() -#else -#define TYPENAME(T) "?" -#endif - -#ifdef _WIN64 -int rand_r(unsigned int* s); -#endif - -// Get Optimal Thread count size -size_t getHostThreadCount(const size_t memPerThread = 200 /* MB */, const size_t maxThreads = 0); - -namespace HipTest { - -// Returns the current system time in microseconds -inline long long get_time() { -#if __CUDACC__ - struct timeval tv; - gettimeofday(&tv, 0); - return (tv.tv_sec * 1000000) + tv.tv_usec; -#else - return std::chrono::high_resolution_clock::now().time_since_epoch() - /std::chrono::microseconds(1); -#endif -} - -double elapsed_time(long long startTimeUs, long long stopTimeUs); - -int parseSize(const char* str, size_t* output); -int parseUInt(const char* str, unsigned int* output); -int parseInt(const char* str, int* output); -int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg); - -unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N); - -template // pointer type -void checkArray(T hData, T hOutputData, size_t width, size_t height,size_t depth) { - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - int offset = i*width*height + j*width + k; - if (hData[offset] != hOutputData[offset]) { - std::cerr << '[' << i << ',' << j << ',' << k << "]:" << hData[offset] << "----" - << hOutputData[offset]<<" "; - failed("mistmatch at:%d %d %d",i,j,k); - } - } - } - } -} - -template -void checkArray(T input, T output, size_t height, size_t width) { - for(int i=0; i -__global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < NELEM; i += stride) { - C_d[i] = A_d[i] + B_d[i]; - } -} - - -template -__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, - size_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - C_d[i] = A_d[i] + B_d[i]; - } -} - - -template -__global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - // Deliberately do this in an inefficient way to increase kernel runtime - for (int i = 0; i < count; i++) { - for (size_t i = offset; i < NELEM; i += stride) { - C_d[i] = A_d[i] + (T)count; - } - } -} - - -template -__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - // Deliberately do this in an inefficient way to increase kernel runtime - for (int i = 0; i < count; i++) { - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - C_d[i] = A_d[i] + (T)count; - } - } -} - - -template -__global__ void memsetReverse(T* C_d, T val, int64_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - C_d[i] = val; - } -} - - -template -void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) { - // Initialize the host data: - for (size_t i = 0; i < numElements; i++) { - if (A_h) (A_h)[i] = 3.146f + i; // Pi - if (B_h) (B_h)[i] = 1.618f + i; // Phi - if (C_h) (C_h)[i] = 0.0f + i; - } -} - - -template -void initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) { - size_t Nbytes = N * sizeof(T); - - if (usePinnedHost) { - if (A_h) { - HIPCHECK(hipHostMalloc(reinterpret_cast(A_h), Nbytes)); - } - if (B_h) { - HIPCHECK(hipHostMalloc(reinterpret_cast(B_h), Nbytes)); - } - if (C_h) { - HIPCHECK(hipHostMalloc(reinterpret_cast(C_h), Nbytes)); - } - } else { - if (A_h) { - *A_h = (T*)malloc(Nbytes); - HIPASSERT(*A_h != NULL); - } - - if (B_h) { - *B_h = (T*)malloc(Nbytes); - HIPASSERT(*B_h != NULL); - } - - if (C_h) { - *C_h = (T*)malloc(Nbytes); - HIPASSERT(*C_h != NULL); - } - } - - setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL); -} - - -template -void initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N, - bool usePinnedHost = false) { - size_t Nbytes = N * sizeof(T); - - if (A_d) { - HIPCHECK(hipMalloc(A_d, Nbytes)); - } - if (B_d) { - HIPCHECK(hipMalloc(B_d, Nbytes)); - } - if (C_d) { - HIPCHECK(hipMalloc(C_d, Nbytes)); - } - - initArraysForHost(A_h, B_h, C_h, N, usePinnedHost); -} - - -template -void freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) { - if (usePinnedHost) { - if (A_h) { - HIPCHECK(hipHostFree(A_h)); - } - if (B_h) { - HIPCHECK(hipHostFree(B_h)); - } - if (C_h) { - HIPCHECK(hipHostFree(C_h)); - } - } else { - if (A_h) { - free(A_h); - } - if (B_h) { - free(B_h); - } - if (C_h) { - free(C_h); - } - } -} - -template -void freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) { - if (A_d) { - HIPCHECK(hipFree(A_d)); - } - if (B_d) { - HIPCHECK(hipFree(B_d)); - } - if (C_d) { - HIPCHECK(hipFree(C_d)); - } - - freeArraysForHost(A_h, B_h, C_h, usePinnedHost); -} - -#if defined(__HIP_PLATFORM_AMD__) -template -void initArrays2DPitch(T** A_d, T** B_d, T** C_d, size_t* pitch_A, size_t* pitch_B, size_t* pitch_C, - size_t numW, size_t numH) { - if (A_d) { - HIPCHECK(hipMallocPitch((void**)A_d, pitch_A, numW * sizeof(T), numH)); - } - if (B_d) { - HIPCHECK(hipMallocPitch((void**)B_d, pitch_B, numW * sizeof(T), numH)); - } - if (C_d) { - HIPCHECK(hipMallocPitch((void**)C_d, pitch_C, numW * sizeof(T), numH)); - } - - HIPASSERT(*pitch_A == *pitch_B); - HIPASSERT(*pitch_A == *pitch_C) -} - -inline void initHIPArrays(hipArray** A_d, hipArray** B_d, hipArray** C_d, - const hipChannelFormatDesc* desc, const size_t numW, const size_t numH, - const unsigned int flags) { - if (A_d) { - HIPCHECK(hipMallocArray(A_d, desc, numW, numH, flags)); - } - if (B_d) { - HIPCHECK(hipMallocArray(B_d, desc, numW, numH, flags)); - } - if (C_d) { - HIPCHECK(hipMallocArray(C_d, desc, numW, numH, flags)); - } -} -#endif - -// Assumes C_h contains vector add of A_h + B_h -// Calls the test "failed" macro if a mismatch is detected. -template -size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true, - bool reportMismatch = true) { - size_t mismatchCount = 0; - size_t firstMismatch = 0; - size_t mismatchesToPrint = 10; - for (size_t i = 0; i < N; i++) { - T expected = A_h[i] + B_h[i]; - if (result_H[i] != expected) { - if (mismatchCount == 0) { - firstMismatch = i; - } - mismatchCount++; - if ((mismatchCount <= mismatchesToPrint) && expectMatch) { - std::cout << std::fixed << std::setprecision(32); - std::cout << "At " << i << std::endl; - std::cout << " Computed:" << result_H[i] << std::endl; - std::cout << " Expected:" << expected << std::endl; - } - } - } - - if (reportMismatch) { - if (expectMatch) { - if (mismatchCount) { - failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); - } - } else { - if (mismatchCount == 0) { - failed("expected mismatches but did not detect any!"); - } - } - } - - return mismatchCount; -} - - -// Assumes C_h contains vector add of A_h + B_h -// Calls the test "failed" macro if a mismatch is detected. -template -void checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) { - size_t mismatchCount = 0; - size_t firstMismatch = 0; - size_t mismatchesToPrint = 10; - for (size_t i = 0; i < N; i++) { - if (result_H[i] != expected_H[i]) { - if (mismatchCount == 0) { - firstMismatch = i; - } - mismatchCount++; - if ((mismatchCount <= mismatchesToPrint) && expectMatch) { - std::cout << std::fixed << std::setprecision(32); - std::cout << "At " << i << std::endl; - std::cout << " Computed:" << result_H[i] << std::endl; - std::cout << " Expected:" << expected_H[i] << std::endl; - } - } - } - - if (expectMatch) { - if (mismatchCount) { - fprintf(stderr, "%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); - // failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); - } - } else { - if (mismatchCount == 0) { - failed("expected mismatches but did not detect any!"); - } - } -} - - -//--- -struct Pinned { - static const bool isPinned = true; - static const char* str() { return "Pinned"; }; - - static void* Alloc(size_t sizeBytes) { - void* p; - HIPCHECK(hipHostMalloc((void**)&p, sizeBytes)); - return p; - }; -}; - - -//--- -struct Unpinned { - static const bool isPinned = false; - static const char* str() { return "Unpinned"; }; - - static void* Alloc(size_t sizeBytes) { - void* p = malloc(sizeBytes); - HIPASSERT(p); - return p; - }; -}; - - -struct Memcpy { - static const char* str() { return "Memcpy"; }; -}; - -struct MemcpyAsync { - static const char* str() { return "MemcpyAsync"; }; -}; - - -template -struct MemTraits; - - -template <> -struct MemTraits { - static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind, - hipStream_t stream) { - HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind)); - } -}; - - -template <> -struct MemTraits { - static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind, - hipStream_t stream) { - HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream)); - } -}; - -inline bool isImageSupported() { - int imageSupport = 1; -#ifdef __HIP_PLATFORM_AMD__ - HIPCHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, - p_gpuDevice)); -#endif - return imageSupport != 0; -} - -}; // namespace HipTest - -// This must be called in the beginning of image test app's main() to indicate whether image -// is supported. -#define checkImageSupport() \ - if (!HipTest::isImageSupported()) \ - { printf("Texture is not support on the device. Skipped.\n"); passed(); } -#endif //__cplusplus - -// Function to determine if the device is of gfx11 architecture -bool IsGfx11(); \ No newline at end of file diff --git a/tests/src/texture/hipBindTex2DPitch.cpp b/tests/src/texture/hipBindTex2DPitch.cpp deleted file mode 100644 index 3fb8d21f8b..0000000000 --- a/tests/src/texture/hipBindTex2DPitch.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/*HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include "test_common.h" - -#define SIZE_H 8 -#define SIZE_W 12 -#define TYPE_t float - -texture tex; - -// texture object is a kernel argument -__global__ void texture2dCopyKernel( TYPE_t* dst) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - if ( (x< SIZE_W) && (y< SIZE_H) ){ - dst[SIZE_W*y+x] = tex2D(tex, x, y); - } -#endif -} - -int main (void) -{ - checkImageSupport(); - TYPE_t* B; - TYPE_t* A; - TYPE_t* devPtrB; - TYPE_t* devPtrA; - - B = new TYPE_t[SIZE_H*SIZE_W]; - A = new TYPE_t[SIZE_H*SIZE_W]; - for (size_t i = 0; i < (SIZE_H * SIZE_W); i++) { - A[i] = i + 1; - } - - size_t devPitchA, tex_ofs; - HIPCHECK(hipMallocPitch((void**)&devPtrA, &devPitchA ,SIZE_W*sizeof(TYPE_t), SIZE_H)) ; - HIPCHECK(hipMemcpy2D(devPtrA, devPitchA, A, SIZE_W*sizeof(TYPE_t), - SIZE_W*sizeof(TYPE_t), SIZE_H, hipMemcpyHostToDevice)); - - tex.addressMode[0] = hipAddressModeClamp; - tex.addressMode[1] = hipAddressModeClamp; - tex.normalized = false; - HIPCHECK(hipBindTexture2D(&tex_ofs, &tex, devPtrA, &tex.channelDesc, - SIZE_W, SIZE_H, devPitchA)); - HIPCHECK(hipMalloc((void**)&devPtrB, SIZE_W*sizeof(TYPE_t)*SIZE_H)) ; - - hipLaunchKernelGGL(texture2dCopyKernel, dim3(3, 2, 1), dim3(4, 4, 1), 0, 0, devPtrB); - hipDeviceSynchronize(); - HIPCHECK(hipMemcpy2D(B, SIZE_W*sizeof(TYPE_t), devPtrB, SIZE_W*sizeof(TYPE_t), - SIZE_W*sizeof(TYPE_t), SIZE_H, hipMemcpyDeviceToHost)); - - HipTest::checkArray(A, B, SIZE_H, SIZE_W); - delete []A; - delete []B; - hipFree(devPtrA); - hipFree(devPtrB); - passed(); -} diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp deleted file mode 100644 index 29510ac161..0000000000 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define N 512 - -texture tex; - -__global__ void kernel(float *out) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - if(x -#include "test_common.h" - -using namespace std; -#define R 8 //rows, height -#define C 8 //columns, width - -bool runTest(void); - -int main(int argc, char** argv) { - checkImageSupport(); - - bool testResult=runTest(); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -bool runTest() -{ -bool testResult=true; -hipChannelFormatDesc chan_test,chan_desc=hipCreateChannelDesc(32,0,0,0,hipChannelFormatKindSigned); -hipArray *hipArray; -HIPCHECK(hipMallocArray(&hipArray, &chan_desc,C,R,0)); -HIPCHECK(hipGetChannelDesc(&chan_test,hipArray)); - -if((chan_test.x == 32)&&(chan_test.y == 0)&&(chan_test.z == 0)&&(chan_test.f == hipChannelFormatKindSigned)) - testResult=true; -else - testResult=false; - -HIPCHECK(hipFreeArray(hipArray)); -return testResult; -} diff --git a/tests/src/texture/hipNormalizedFloatValueTex.cpp b/tests/src/texture/hipNormalizedFloatValueTex.cpp deleted file mode 100644 index 0447c6aae2..0000000000 --- a/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * // Test hipFilterModePoint - * TEST: %t --textureFilterMode 0 - * // Test hipFilterModeLinear - * TEST: %t --textureFilterMode 1 - * HIT_END - */ - -#include "test_common.h" -#include -#define SIZE 10 -#include "hipTextureHelper.hpp" - -static float getNormalizedValue(const float value, - const hipChannelFormatDesc& desc) { - if ((desc.x == 8) && (desc.f == hipChannelFormatKindSigned)) - return (value / SCHAR_MAX); - if ((desc.x == 8) && (desc.f == hipChannelFormatKindUnsigned)) - return (value / UCHAR_MAX); - if ((desc.x == 16) && (desc.f == hipChannelFormatKindSigned)) - return (value / SHRT_MAX); - if ((desc.x == 16) && (desc.f == hipChannelFormatKindUnsigned)) - return (value / USHRT_MAX); - return value; -} - -texture texc; - -texture texuc; - -texture texs; - -texture texus; - - -template -__global__ void normalizedValTextureTest(unsigned int numElements, float* pDst) -{ -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - unsigned int elementID = threadIdx.x; - if(elementID >= numElements) - return; - float coord =(float) elementID/numElements; - if(std::is_same::value) - pDst[elementID] = tex1D(texc, coord); - else if(std::is_same::value) - pDst[elementID] = tex1D(texuc, coord); - else if(std::is_same::value) - pDst[elementID] = tex1D(texs, coord); - else if(std::is_same::value) - pDst[elementID] = tex1D(texus, coord); -#endif -} - -bool textureVerifyFilterModePoint(float *hOutputData, float *expected, size_t size) { - bool testResult = true; - for (int i = 0; i < size; i++) { - if ((hOutputData[i] == expected[i]) - || (i >= 1 && hOutputData[i] == expected[i - 1]) || // round down - (i < (size - 1) && hOutputData[i] == expected[i + 1])) // round up - { - continue; - } - printf("mismatch at output[%d]:%f expected[%d]:%f", i, hOutputData[i], i, - expected[i]); - if (i >= 1) { - printf(", expected[%d]:%f", i - 1, expected[i - 1]); - } - if (i < (size - 1)) { - printf(", expected[%d]:%f", i + 1, expected[i + 1]); - } - printf("\n"); - testResult = false; - break; - } - return testResult; -} - -bool textureVerifyFilterModeLinear(float *hOutputData, float *expected, size_t size) { - bool testResult = true; - for (int i = 0; i < size; i++) { - float mean = (fabs(expected[i]) + fabs(hOutputData[i])) / 2; - float ratio = fabs(expected[i] - hOutputData[i]) / (mean + HIP_SAMPLING_VERIFY_EPSILON); - if (ratio > HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD) { - printf("mismatch at output[%d]:%f expected[%d]:%f, ratio:%f\n", i, - hOutputData[i], i, expected[i], ratio); - testResult = false; - break; - } - } - return testResult; -} - -template -bool textureVerify(float *hOutputData, float *expected, size_t size) { - bool testResult = true; - if (fMode == hipFilterModePoint) { - testResult = textureVerifyFilterModePoint(hOutputData, expected, size); - } else if (fMode == hipFilterModeLinear) { - testResult = textureVerifyFilterModeLinear(hOutputData, expected, size); - } - return testResult; -} - -template -bool textureTest(texture *tex) -{ - hipChannelFormatDesc desc = hipCreateChannelDesc(); - hipArray_t dData; - HIPCHECK(hipMallocArray(&dData, &desc, SIZE)); - - T hData[] = {65, 66, 67, 68, 69, 70, 71, 72, 73, 74}; - HIPCHECK(hipMemcpy2DToArray(dData, 0, 0, hData, sizeof(T)*SIZE, sizeof(T)*SIZE, 1, hipMemcpyHostToDevice)); - - tex->addressMode[0] = hipAddressModeClamp; - tex->normalized = true; - tex->channelDesc = desc; - tex->filterMode = fMode; - HIPCHECK(hipBindTextureToArray(tex, dData, &desc)); - - float *dOutputData = NULL; - HIPCHECK(hipMalloc((void **) &dOutputData, sizeof(float)*SIZE)); - - hipLaunchKernelGGL(normalizedValTextureTest, dim3(1,1,1), dim3(SIZE,1,1), 0, 0, SIZE, dOutputData); - - float *hOutputData = new float[SIZE]; - HIPCHECK(hipMemcpy(hOutputData, dOutputData, (sizeof(float)*SIZE), hipMemcpyDeviceToHost)); - - float expected[SIZE]; - for(int i = 0; i < SIZE; i++) { - expected[i] = getNormalizedValue(float(hData[i]), desc); - } - bool testResult = textureVerify(hOutputData, expected, SIZE); - - HIPCHECK(hipFreeArray(dData)); - HIPCHECK(hipFree(dOutputData)); - delete [] hOutputData; - return testResult; -} - -template -bool runTest() { - bool status = true; - status &= textureTest(&texc); - status &= textureTest(&texuc); - status &= textureTest(&texs); - status &= textureTest(&texus); - return status; -} - -int main(int argc, char** argv) -{ - HipTest::parseStandardArguments(argc, argv, true); - checkImageSupport(); - - int device = p_gpuDevice; - bool status = false; - HIPCHECK(hipSetDevice(device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - std::cout << "Device :: " << props.name << std::endl; - #ifdef __HIP_PLATFORM_AMD__ - std::cout << "Arch - AMD GPU :: " << props.gcnArch << std::endl; - #endif - - if(textureFilterMode == 0) { - printf("Test hipFilterModePoint\n"); - status = runTest(); - } else if(textureFilterMode == 1) { - printf("Test hipFilterModeLinear\n"); - printf("THRESH_HOLD:%f, EPSILON:%f\n", HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD, - HIP_SAMPLING_VERIFY_EPSILON); - status = runTest(); - } else { - printf("Wrong argument!\n"); - printf("hipNormalizedFloatValueTex --textureFilterMode 0 for hipFilterModePoint\n"); - printf("hipNormalizedFloatValueTex --textureFilterMode 1 for hipFilterModeLinear\n"); - } - - if(status){ - passed(); - } - else{ - failed("checks failed!"); - } -} diff --git a/tests/src/texture/hipTex1DFetchCheckModes.cpp b/tests/src/texture/hipTex1DFetchCheckModes.cpp deleted file mode 100644 index a7fbee744a..0000000000 --- a/tests/src/texture/hipTex1DFetchCheckModes.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "../test_common.h" - -#define N 16 -#define offset 3 -__global__ void tex1dKernel(float *val, hipTextureObject_t obj) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int k = blockIdx.x * blockDim.x + threadIdx.x; - if (k < N) - val[k] = tex1Dfetch(obj, k+offset); -#endif -} - -int runTest(hipTextureAddressMode, hipTextureFilterMode); - -int main(int argc, char **argv) { - checkImageSupport(); - - int testResult = runTest(hipAddressModeClamp,hipFilterModePoint); - testResult = testResult & runTest(hipAddressModeClamp,hipFilterModeLinear); - testResult = testResult & runTest(hipAddressModeBorder,hipFilterModePoint); - testResult = testResult & runTest(hipAddressModeBorder,hipFilterModeLinear); - if(testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -int runTest(hipTextureAddressMode addressMode, hipTextureFilterMode filterMode) { - - int testResult = 1; - - hipCtx_t HipContext; - hipDevice_t HipDevice; - int deviceID = 0; - hipDeviceGet(&HipDevice, deviceID); - hipCtxCreate(&HipContext, 0, HipDevice); - - // Allocating the required buffer on gpu device - float *texBuf, *texBufOut; - float val[N], output[N]; - - for (int i = 0; i < N; i++) { - val[i] = i+1; - output[i] = 0.0; - } - - HIPCHECK(hipMalloc(&texBuf, N * sizeof(float))); - HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float))); - HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice)); - HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float))); - hipResourceDesc resDescLinear; - - memset(&resDescLinear, 0, sizeof(resDescLinear)); - resDescLinear.resType = hipResourceTypeLinear; - resDescLinear.res.linear.devPtr = texBuf; - resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - resDescLinear.res.linear.sizeInBytes = N * sizeof(float); - - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.readMode = hipReadModeElementType; - - texDesc.addressMode[0] = addressMode; - texDesc.filterMode = filterMode; - texDesc.normalizedCoords = false; - - // Creating texture object - hipTextureObject_t texObj = 0; - HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL)); - - dim3 dimBlock(1, 1, 1); - dim3 dimGrid(N , 1, 1); - - hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, - texBufOut, texObj); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost)); - - for (int i = offset; i < N; i++) { - if (output[i-offset] != val[i]) { - testResult = 0; - break; - } - } - // For hipResourceTypeLinear, reading of out-of-boundary address is undefined! - // So we won't verify those data - - HIPCHECK(hipDestroyTextureObject(texObj)); - HIPCHECK(hipFree(texBuf)); - HIPCHECK(hipFree(texBufOut)); - printf("%s(addressMode %d, filterMode %d) %s\n", __FUNCTION__, addressMode, filterMode, testResult ? "succeed" : "failed"); - return testResult; -} diff --git a/tests/src/texture/hipTexObjPitch.cpp b/tests/src/texture/hipTexObjPitch.cpp deleted file mode 100644 index 948556bf38..0000000000 --- a/tests/src/texture/hipTexObjPitch.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/*HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include "test_common.h" - -#define SIZE_H 20 -#define SIZE_W 179 -// texture object is a kernel argument -template -__global__ void texture2dCopyKernel( hipTextureObject_t texObj, TYPE_t* dst,TYPE_t* A) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - for(int i =0;i(texObj, j, i); - __syncthreads(); -#endif -} - -template -void texture2Dtest() -{ - TYPE_t* B; - TYPE_t* A; - TYPE_t* devPtrB; - TYPE_t* devPtrA; - - B = new TYPE_t[SIZE_H*SIZE_W]; - A = new TYPE_t[SIZE_H*SIZE_W]; - for(size_t i=1; i <= (SIZE_H*SIZE_W); i++){ - A[i-1] = i; - } - - size_t devPitchA; - HIPCHECK(hipMallocPitch((void**)&devPtrA, &devPitchA ,SIZE_W*sizeof(TYPE_t), SIZE_H)) ; - HIPCHECK(hipMemcpy2D(devPtrA, devPitchA, A, SIZE_W*sizeof(TYPE_t), - SIZE_W*sizeof(TYPE_t), SIZE_H, hipMemcpyHostToDevice)); - - // Use the texture object - hipResourceDesc texRes; - memset(&texRes, 0, sizeof(texRes)); - texRes.resType = hipResourceTypePitch2D; - texRes.res.pitch2D.devPtr = devPtrA; - texRes.res.pitch2D.height = SIZE_H; - texRes.res.pitch2D.width = SIZE_W; - texRes.res.pitch2D.pitchInBytes = devPitchA; - texRes.res.pitch2D.desc = hipCreateChannelDesc(); - - hipTextureDesc texDescr; - memset(&texDescr, 0, sizeof(texDescr)); - texDescr.normalizedCoords = false; - texDescr.filterMode = hipFilterModePoint; - texDescr.mipmapFilterMode = hipFilterModePoint; - texDescr.addressMode[0] = hipAddressModeClamp; - texDescr.addressMode[1] = hipAddressModeClamp; - texDescr.addressMode[2] = hipAddressModeClamp; - texDescr.readMode = hipReadModeElementType; - - hipTextureObject_t texObj; - HIPCHECK(hipCreateTextureObject(&texObj, &texRes, &texDescr, NULL)); - - HIPCHECK(hipMalloc((void**)&devPtrB, SIZE_W*sizeof(TYPE_t)*SIZE_H)) ; - - hipLaunchKernelGGL(texture2dCopyKernel, dim3(1,1,1), dim3(1,1,1), 0, 0, - texObj, devPtrB, devPtrA); - - HIPCHECK(hipMemcpy2D(B, SIZE_W*sizeof(TYPE_t), devPtrB, SIZE_W*sizeof(TYPE_t), - SIZE_W*sizeof(TYPE_t), SIZE_H, hipMemcpyDeviceToHost)); - - HipTest::checkArray(A, B, SIZE_H, SIZE_W); - delete []A; - delete []B; - hipFree(devPtrA); - hipFree(devPtrB); -} - -int main() -{ - checkImageSupport(); - - texture2Dtest(); - texture2Dtest(); - texture2Dtest(); - texture2Dtest(); - texture2Dtest(); - texture2Dtest(); - passed(); -} diff --git a/tests/src/texture/hipTextureHelper.hpp b/tests/src/texture/hipTextureHelper.hpp deleted file mode 100644 index 26f3675834..0000000000 --- a/tests/src/texture/hipTextureHelper.hpp +++ /dev/null @@ -1,227 +0,0 @@ -#pragma once - -#define HIP_SAMPLING_VERIFY_EPSILON 0.00001 -// The internal precision varies by the GPU family and sometimes within the family. -// Thus the following threshold is subject to change. -#define HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD 0.05 // 5% for filter mode -#define HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD 0.1 - -template -bool hipTextureSamplingVerify(const type outputData, const type expected) { - bool testResult = false; - if (fMode == hipFilterModePoint) { - testResult = outputData == expected; - } else if (fMode == hipFilterModeLinear) { - const type mean = (fabs(outputData) + fabs(expected)) / 2; - const type diff = fabs(outputData - expected); - const type ratio = diff / (mean + HIP_SAMPLING_VERIFY_EPSILON); - if (ratio <= HIP_SAMPLING_VERIFY_RELATIVE_THRESHOLD) { - testResult = true; - } else if (diff <= HIP_SAMPLING_VERIFY_ABSOLUTE_THRESHOLD) { - // Some small outputs have big ratio due to float operation difference of ALU and GPU - testResult = true; - } - } - return testResult; -} - -// Simulate CTS static AddressingTable sAddressingTable -template -void hipTextureGetAddress(int &value, const int maxValue) -{ - switch(addressMode) - { - case hipAddressModeClamp: - value = value < 0 ? 0 - : (value > maxValue - 1 ? maxValue - 1 : value); - break; - case hipAddressModeBorder: - value = value < -1 ? -1 - : (value > maxValue ? maxValue : value); - break; - default: - break; - } -} - -// Simlate logics in CTS read_image_pixel_float(). -// x, y and z must be returned by hipTextureGetAddress() -template -float hipTextureGetValue(const float *data, const int x, const int width, - const int y = 0, const int height = 0,const int z = 0, const int depth = 0) { - float result = std::numeric_limits::lowest(); - switch (addressMode) { - case hipAddressModeClamp: - if (width > 0) { - if (height == 0 && depth == 0) { - result = data[x]; // 1D - } else if (depth == 0) { - result = data[y * width + x]; // 2D - } else { - result = data[z * width * height + y * width + x]; // 3D - } - } - break; - case hipAddressModeBorder: - if (width > 0) { - if (height == 0 && depth == 0) { - result = (x >= 0 && x < width) ? data[x] : 0; // 1D - } else if (depth == 0) { - result = (x >= 0 && x < width && y >= 0 && y < height) ? - data[y * width + x] : 0; // 2D - } else { - result = (x >= 0 && x < width && y >= 0 && y < height && z >= 0 && z < depth) ? - data[z * width * height + y * width + x] : 0; // 3D - } - } - break; - default: - break; - } - return result; -} - -template -float getExpectedValue(const int width, float x, const float *data) { - float result = std::numeric_limits::lowest(); - switch (filterMode) { - case hipFilterModePoint: { - int i1 = static_cast(floor(x)); - hipTextureGetAddress < addressMode > (i1, width); - result = hipTextureGetValue < addressMode > (data, i1, width); - } - break; - case hipFilterModeLinear: { - x -= 0.5; - int i1 = static_cast(floor(x)); - int i2 = i1 + 1; - float a = x - i1; - hipTextureGetAddress < addressMode > (i1, width); - hipTextureGetAddress < addressMode > (i2, width); - - float t1 = hipTextureGetValue < addressMode > (data, i1, width); - float t2 = hipTextureGetValue < addressMode > (data, i2, width); - - return (1 - a) * t1 + a * t2; - } - break; - } - return result; -} - -template -float getExpectedValue(const int width, const int height, float x, float y, const float *data) { - float result = std::numeric_limits::lowest(); - switch (filterMode) { - case hipFilterModePoint: { - int i1 = static_cast(floor(x)); - int j1 = static_cast(floor(y)); - hipTextureGetAddress < addressMode > (i1, width); - hipTextureGetAddress < addressMode > (j1, height); - result = hipTextureGetValue < addressMode > (data, i1, width, j1, height); - } - break; - case hipFilterModeLinear: { - x -= 0.5; - y -= 0.5; - - int i1 = static_cast(floor(x)); - int j1 = static_cast(floor(y)); - - int i2 = i1 + 1; - int j2 = j1 + 1; - - float a = x - i1; - float b = y - j1; - - hipTextureGetAddress < addressMode > (i1, width); - hipTextureGetAddress < addressMode > (i2, width); - hipTextureGetAddress < addressMode > (j1, height); - hipTextureGetAddress < addressMode > (j2, height); - - float t11 = hipTextureGetValue < addressMode - > (data, i1, width, j1, height); - float t21 = hipTextureGetValue < addressMode - > (data, i2, width, j1, height); - float t12 = hipTextureGetValue < addressMode - > (data, i1, width, j2, height); - float t22 = hipTextureGetValue < addressMode - > (data, i2, width, j2, height); - - result = (1 - a) * (1 - b) * t11 + a * (1 - b) * t21 + (1 - a) * b * t12 - + a * b * t22; - } - break; - } - return result; -} - -template -float getExpectedValue(const int width, const int height, const int depth, - float x, float y, float z, const float *data) { - float result = std::numeric_limits::lowest(); - switch (filterMode) { - case hipFilterModePoint: { - int i1 = static_cast(floor(x)); - int j1 = static_cast(floor(y)); - int k1 = static_cast(floor(z)); - - hipTextureGetAddress < addressMode > (i1, width); - hipTextureGetAddress < addressMode > (j1, height); - hipTextureGetAddress < addressMode > (k1, depth); - - result = hipTextureGetValue < addressMode > (data, i1, width, j1, height, k1, depth); - } - break; - case hipFilterModeLinear: { - x -= 0.5; - y -= 0.5; - z -= 0.5; - - int i1 = static_cast(floor(x)); - int j1 = static_cast(floor(y)); - int k1 = static_cast(floor(z)); - - int i2 = i1 + 1; - int j2 = j1 + 1; - int k2 = k1 + 1; - - float a = x - i1; - float b = y - j1; - float c = z - k1; - - hipTextureGetAddress < addressMode > (i1, width); - hipTextureGetAddress < addressMode > (i2, width); - hipTextureGetAddress < addressMode > (j1, height); - hipTextureGetAddress < addressMode > (j2, height); - hipTextureGetAddress < addressMode > (k1, depth); - hipTextureGetAddress < addressMode > (k2, depth); - - float t111 = hipTextureGetValue < addressMode - > (data, i1, width, j1, height, k1, depth); - float t211 = hipTextureGetValue < addressMode - > (data, i2, width, j1, height, k1, depth); - float t121 = hipTextureGetValue < addressMode - > (data, i1, width, j2, height, k1, depth); - float t112 = hipTextureGetValue < addressMode - > (data, i1, width, j1, height, k2, depth); - float t122 = hipTextureGetValue < addressMode - > (data, i1, width, j2, height, k2, depth); - float t212 = hipTextureGetValue < addressMode - > (data, i2, width, j1, height, k2, depth); - float t221 = hipTextureGetValue < addressMode - > (data, i2, width, j2, height, k1, depth); - float t222 = hipTextureGetValue < addressMode - > (data, i2, width, j2, height, k2, depth); - - result = - (1 - a) * (1 - b) * (1 - c) * t111 + a * (1 - b) * (1 - c) * t211 + - (1 - a) * b * (1 - c) * t121 + a * b * (1 - c) * t221 + - (1 - a) * (1 - b) * c * t112 + a * (1 - b) * c * t212 + - (1 - a) * b * c * t122 + a * b * c * t222; - - } - break; - } - return result; -} \ No newline at end of file diff --git a/tests/src/texture/hipTextureMipmapObj2D.cpp b/tests/src/texture/hipTextureMipmapObj2D.cpp deleted file mode 100644 index d3b174be52..0000000000 --- a/tests/src/texture/hipTextureMipmapObj2D.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia - * TEST: %t - * HIT_END - */ -#include -#include -#include -#include - -#include -#include "test_common.h" - -// Height Width Vector -std::vector hw_vector = {2048, 1024, 512, 256, 64}; -std::vector mip_vector = {8, 4, 2, 1}; - -__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width, - int height, float level) { -#ifndef __gfx90a__ -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2DLod(textureObject, x, y, level); -#endif -#endif -} - -bool runMipMapTest(unsigned int width, unsigned int height, unsigned int mipmap_level) { - bool testResult = true; - - printf("Width: %u Height: %u mip: %u \n", width, height, mipmap_level); - - // Create new width & height to be tested - unsigned int orig_width = width; - unsigned int orig_height = height; - width /= pow(2, mipmap_level); - height /= pow(2, mipmap_level); - unsigned int size = width * height * sizeof(float); - - - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - if (i % width == 0) { - printf("\n"); - } - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - HIP_ARRAY3D_DESCRIPTOR mipmapped_array_desc; - memset(&mipmapped_array_desc, 0x00, sizeof(HIP_ARRAY3D_DESCRIPTOR)); - mipmapped_array_desc.Width = orig_width; - mipmapped_array_desc.Height = orig_height; - mipmapped_array_desc.Depth = 0; - mipmapped_array_desc.Format = HIP_AD_FORMAT_FLOAT; - mipmapped_array_desc.NumChannels = ((channelDesc.x != 0) + (channelDesc.y != 0) - + (channelDesc.z != 0) + (channelDesc.w != 0)); - mipmapped_array_desc.Flags = 0; - - - hipMipmappedArray* mip_array_ptr; - hipMipmappedArrayCreate(&mip_array_ptr, &mipmapped_array_desc, 2 * mipmap_level); - - hipArray *hipArray = nullptr; - HIPCHECK(hipMipmappedArrayGetLevel(&hipArray, mip_array_ptr, mipmap_level)); - HIPCHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - - // Specify texture object parameters - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = hipAddressModeWrap; - texDesc.addressMode[1] = hipAddressModeWrap; - texDesc.filterMode = hipFilterModePoint; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = 0; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, - width, height, (2 * mipmap_level)); - - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - if (i % width == 0) { - printf("\n"); - } - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = false; - break; - } - } - } - hipDestroyTextureObject(textureObject); - hipFree(dData); - hipFreeArray(hipArray); - free(hData); - return testResult; -} - - -bool runTest(int argc, char** argv) { - bool testResult = true; - - for (auto& hw: hw_vector) { - for (auto& mip: mip_vector) { - if ((hw / static_cast(pow (2,(mip * 2)))) > 0) { - testResult |= runMipMapTest(hw, hw, mip); - } - } - } - - printf("\n"); - return testResult; -} - -int main(int argc, char** argv) { - checkImageSupport(); - - bool testResult = true; -#ifdef _WIN32 - testResult = runTest(argc, argv); -#else - std::cout<<"Mipmaps are Supported only on windows, skipping the test"< -#include -#include - -#include -#include "test_common.h" -#include "hipTextureHelper.hpp" - -template -__global__ void tex1DKernel(float *outputData, hipTextureObject_t textureObject, - int width, float offsetX) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - outputData[x] = tex1D(textureObject, normalizedCoords ? (x + offsetX) / width : x + offsetX); -#endif -} - -template -bool runTest(const int width, const float offsetX) { - printf("%s(addressMode=%d, filterMode=%d, normalizedCoords=%d, width=%d, offsetX=%f)\n", __FUNCTION__, - addressMode, filterMode, normalizedCoords, width, offsetX); - bool testResult = true; - unsigned int size = width * sizeof(float); - float *hData = (float*) malloc(size); - memset(hData, 0, size); - for (int j = 0; j < width; j++) { - hData[j] = j; - } - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc( - 32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray *hipArray; - hipMallocArray(&hipArray, &channelDesc, width); - - HIPCHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, width * sizeof(float), width * sizeof(float), 1, hipMemcpyHostToDevice)); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - - // Specify texture object parameters - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = addressMode; - texDesc.filterMode = filterMode; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = normalizedCoords; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - - float *dData = NULL; - hipMalloc((void**) &dData, size); - - dim3 dimBlock(16, 1, 1); - dim3 dimGrid((width + dimBlock.x - 1)/ dimBlock.x, 1, 1); - - hipLaunchKernelGGL(tex1DKernel, dimGrid, dimBlock, 0, 0, dData, - textureObject, width, offsetX); - - hipDeviceSynchronize(); - - float *hOutputData = (float*) malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - for (int j = 0; j < width; j++) { - float expectedValue = getExpectedValue(width, offsetX + j, hData); - if (!hipTextureSamplingVerify(hOutputData[j], expectedValue)) { - printf("mismatched [ %d ]:%f ----%f\n", j, hOutputData[j], expectedValue); - testResult = false; - break; - } - } - - hipDestroyTextureObject(textureObject); - hipFree(dData); - hipFreeArray(hipArray); - free(hData); - free(hOutputData); - printf("%s %s\n", __FUNCTION__, testResult ? "succeeded":"failed"); - return testResult; -} - -int main(int argc, char **argv) { - checkImageSupport(); - - bool testResult = true; - testResult = testResult && runTest(256, -3); - testResult = testResult && runTest(256, 4); - - testResult = testResult && runTest(256, -8.5); - testResult = testResult && runTest(256, 12.5); - - testResult = testResult && runTest(256, -3); - testResult = testResult && runTest(256, 4); - - testResult = testResult && runTest(256, -8.5); - testResult = testResult && runTest(256, 12.5); - - testResult = testResult && runTest(256, -3); - testResult = testResult && runTest(256, 4); - - testResult = testResult && runTest(256, -8.5); - testResult = testResult && runTest(256, 12.5); - - testResult = testResult && runTest(256, -3); - testResult = testResult && runTest(256, 4); - - testResult = testResult && runTest(256, -8.5); - testResult = testResult && runTest(256, 12.5); - - if (testResult) { - passed(); - } else { - exit (EXIT_FAILURE); - } -} diff --git a/tests/src/texture/hipTextureObj1DFetch.cpp b/tests/src/texture/hipTextureObj1DFetch.cpp deleted file mode 100644 index 95f05a0a1e..0000000000 --- a/tests/src/texture/hipTextureObj1DFetch.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/*HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "test_common.h" - -#define N 512 - -__global__ void tex1dKernel(float *val, hipTextureObject_t obj) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int k = blockIdx.x * blockDim.x + threadIdx.x; - if (k < N) - val[k] = tex1Dfetch(obj, k); -#endif -} - -int runTest(void); - -int main(int argc, char **argv) { - checkImageSupport(); - - int testResult = runTest(); - if(testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -int runTest() { - int testResult = 1; - // Allocating the required buffer on gpu device - float *texBuf, *texBufOut; - float val[N], output[N]; - for (int i = 0; i < N; i++) { - val[i] = (i + 1) * (i + 1); - output[i] = 0.0; - } - HIPCHECK(hipMalloc(&texBuf, N * sizeof(float))); - HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float))); - HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice)); - HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float))); - hipResourceDesc resDescLinear; - - memset(&resDescLinear, 0, sizeof(resDescLinear)); - resDescLinear.resType = hipResourceTypeLinear; - resDescLinear.res.linear.devPtr = texBuf; - resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - resDescLinear.res.linear.sizeInBytes = N * sizeof(float); - - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.readMode = hipReadModeElementType; - texDesc.addressMode[0]= hipAddressModeClamp; - - // Creating texture object - hipTextureObject_t texObj = 0; - HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL)); - - dim3 dimBlock(64, 1, 1); - dim3 dimGrid(N / dimBlock.x, 1, 1); - - hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, - texBufOut, texObj); - HIPCHECK(hipDeviceSynchronize()); - - HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost)); - - for(int i = 0; i < N; i++) - if (output[i] != val[i]) { - testResult = 0; - break; - } - - HIPCHECK(hipDestroyTextureObject(texObj)); - HIPCHECK(hipFree(texBuf)); - HIPCHECK(hipFree(texBufOut)); - return testResult; -} diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp deleted file mode 100644 index 2641c28e17..0000000000 --- a/tests/src/texture/hipTextureObj2D.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include -#include -#include - -#include -#include "test_common.h" - -__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width, - int height) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2D(textureObject, x, y); -#endif -} - -int runTest(int argc, char** argv); - -int main(int argc, char** argv) { - checkImageSupport(); - - int testResult = runTest(argc, argv); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -int runTest(int argc, char** argv) { - int testResult = 1; - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray* hipArray; - hipMallocArray(&hipArray, &channelDesc, width, height); - - hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - - // Specify texture object parameters - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = hipAddressModeClamp; - texDesc.addressMode[1] = hipAddressModeClamp; - texDesc.filterMode = hipFilterModePoint; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = 0; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, - width, height); - - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = 0; - break; - } - } - } - hipDestroyTextureObject(textureObject); - hipFree(dData); - hipFreeArray(hipArray); - free(hData); - return testResult; -} diff --git a/tests/src/texture/hipTextureObj2DCheckModes.cpp b/tests/src/texture/hipTextureObj2DCheckModes.cpp deleted file mode 100644 index f8418d3828..0000000000 --- a/tests/src/texture/hipTextureObj2DCheckModes.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include -#include -#include - -#include -#include "test_common.h" -#include "hipTextureHelper.hpp" - -template -__global__ void tex2DKernel(float *outputData, hipTextureObject_t textureObject, - int width, int height, float offsetX, - float offsetY) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2D(textureObject, - normalizedCoords ? (x + offsetX) / width : x + offsetX, - normalizedCoords ? (y + offsetY) / height : y + offsetY); -#endif -} - -template -bool runTest(const int width, const int height, const float offsetX, const float offsetY) { - printf("%s(addressMode=%d, filterMode=%d, normalizedCoords=%d, width=%d, height=%d, offsetX=%f, offsetY=%f)\n", - __FUNCTION__, addressMode, filterMode, normalizedCoords, width, height, offsetX, offsetY); - bool testResult = true; - unsigned int size = width * height * sizeof(float); - float *hData = (float*) malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - int index = i * width + j; - hData[index] = index; - } - } - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc( - 32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray *hipArray; - hipMallocArray(&hipArray, &channelDesc, width, height); - - HIPCHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, width * sizeof(float), width * sizeof(float), height, hipMemcpyHostToDevice)); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - - // Specify texture object parameters - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = addressMode; - texDesc.addressMode[1] = addressMode; - texDesc.filterMode = filterMode; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = normalizedCoords; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - - float *dData = NULL; - hipMalloc((void**) &dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); - - hipLaunchKernelGGL(tex2DKernel, dimGrid, dimBlock, 0, 0, dData, - textureObject, width, height, offsetX, offsetY); - - hipDeviceSynchronize(); - - float *hOutputData = (float*) malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - int index = i * width + j; - float expectedValue = getExpectedValue(width, height, - offsetX + j, offsetY + i, hData); - if (!hipTextureSamplingVerify(hOutputData[index], expectedValue)) { - printf("mismatched [ %d %d ]:%f ----%f\n", j, i, hOutputData[index], expectedValue); - testResult = false; - goto line1; - } - } - } -line1: - hipDestroyTextureObject(textureObject); - hipFree(dData); - hipFreeArray(hipArray); - free(hData); - free(hOutputData); - printf("%s %s\n", __FUNCTION__, testResult ? "succeeded":"failed"); - return testResult; -} - -int main(int argc, char **argv) { - checkImageSupport(); - - bool testResult = true; - - testResult = testResult && runTest(256, 256, -3.9, 6.1); - testResult = testResult && runTest(256, 256, 4.4, -7.0); - - testResult = testResult && runTest(256, 256, -8.5, 2.9); - testResult = testResult && runTest(256, 256, 12.5, 6.7); - - testResult = testResult && runTest(256, 256, -0.4, -0.4); - testResult = testResult && runTest(256, 256, 4, 14.6); - - // The following two cases have quite big deviation on Cpu and Gpu in 2D, so comment them out temporarily. - testResult = testResult && runTest(256, 256, -0.4, 0.4); - testResult = testResult && runTest(256, 256, 12.5, 23.7); - - testResult = testResult && runTest(256, 256, -3, 8.9); - testResult = testResult && runTest(256, 256, 4, -0.1); - - testResult = testResult && runTest(256, 256, -8.5, 15.9); - testResult = testResult && runTest(256, 256, 12.5, -17.9); - - testResult = testResult && runTest(256, 256, -3, 5.8); - testResult = testResult && runTest(256, 256, 4, 9.1); - - // The following two cases have quite big deviation on Cpu and Gpu in 2D, so comment them out temporarily. - testResult = testResult && runTest(256, 256, -8.5, 6.6); - testResult = testResult && runTest(256, 256, 12.5, 0.01); - - if (testResult) { - passed(); - } else { - exit (EXIT_FAILURE); - } -} diff --git a/tests/src/texture/hipTextureObj3DCheckModes.cpp b/tests/src/texture/hipTextureObj3DCheckModes.cpp deleted file mode 100644 index b96cf86717..0000000000 --- a/tests/src/texture/hipTextureObj3DCheckModes.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include -#include -#include - -#include -#include "test_common.h" -#include "hipTextureHelper.hpp" - -bool isGfx90a = false; - -template -__global__ void tex3DKernel(float *outputData, hipTextureObject_t textureObject, - int width, int height, int depth, float offsetX, - float offsetY, float offsetZ) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - int z = blockIdx.z * blockDim.z + threadIdx.z; - outputData[z * width * height + y * width + x] = tex3D(textureObject, - normalizedCoords ? (x + offsetX) / width : x + offsetX, - normalizedCoords ? (y + offsetY) / height : y + offsetY, - normalizedCoords ? (z + offsetZ) / depth : z + offsetZ); -#endif -} - -template -bool runTest(const int width, const int height, const int depth, const float offsetX, const float offsetY, const float offsetZ) { - printf("%s(addressMode=%d, filterMode=%d, normalizedCoords=%d, width=%d, height=%d, depth=%d, offsetX=%f, offsetY=%f, offsetZ=%f)\n", - __FUNCTION__, addressMode, filterMode, normalizedCoords, width, height, - depth, offsetX, offsetY, offsetZ); - bool testResult = true; - unsigned int size = width * height * depth * sizeof(float); - float *hData = (float*) malloc(size); - memset(hData, 0, size); - - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - int index = i * width * height + j * width + k; - hData[index] = index; - } - } - } - - // Allocate array and copy image data - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); - hipArray *arr; - - HIPCHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height, depth), hipArrayDefault)); - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(float), width, height); - myparms.dstArray = arr; - myparms.extent = make_hipExtent(width, height, depth); - myparms.kind = hipMemcpyHostToDevice; - - HIPCHECK(hipMemcpy3D(&myparms)); - - hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = arr; - - // Specify texture object parameters - hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = addressMode; - texDesc.addressMode[1] = addressMode; - texDesc.addressMode[2] = addressMode; - texDesc.filterMode = filterMode; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = normalizedCoords; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipError_t res = hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - if (res != hipSuccess) { - hipFreeArray(arr); - free(hData); - if (res == hipErrorNotSupported && isGfx90a) { - printf("gfx90a doesn't support 3D linear filter! Skipped!\n"); - } else { - testResult = false; - } - return testResult; - } - float *dData = NULL; - hipMalloc((void**) &dData, size); - hipMemset(dData, 0, size); - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, - (depth + dimBlock.z - 1) / dimBlock.z); - - hipLaunchKernelGGL(tex3DKernel, dimGrid, dimBlock, 0, 0, dData, - textureObject, width, height, depth, offsetX, offsetY, offsetZ); - - hipDeviceSynchronize(); - - float *hOutputData = (float*) malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - int index = i * width * height + j * width + k; - float expectedValue = getExpectedValue( - width, height, depth, offsetX + k, offsetY + j, offsetZ + i, hData); - - if (!hipTextureSamplingVerify(hOutputData[index], expectedValue)) { - printf("mismatched [ %d %d %d]:%f ----%f\n", k, j, i, hOutputData[index], expectedValue); - testResult = false; - goto line1; - } - } - } - } -line1: - hipDestroyTextureObject(textureObject); - free(hOutputData); - hipFree(dData); - hipFreeArray(arr); - free(hData); - return testResult; -} - -int main(int argc, char **argv) { - checkImageSupport(); - - bool testResult = true; - - int device = 0; - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - if (!strncmp(props.gcnArchName, "gfx90a", strlen("gfx90a"))) { - isGfx90a = true; - } - - testResult = testResult && runTest(256, 256, 256, -3.9, 6.1, 9.5); - testResult = testResult && runTest(256, 256, 256, 4.4, -7.0, 5.3); - - testResult = testResult && runTest(256, 256, 256, -8.5, 2.9, 5.8); - testResult = testResult && runTest(256, 256, 256, 12.5, 6.7, 11.4); - - testResult = testResult && runTest(256, 256, 256, -0.4, -0.4, -0.4); - testResult = testResult && runTest(256, 256, 256, 4, 14.6, -0.3); - - testResult = testResult && runTest(256, 256, 256, 6.9, 7.4, 0.4); - testResult = testResult && runTest(256, 256, 256, 12.5, 23.7, 0.34); - - testResult = testResult && runTest(256, 256, 256, -3, 8.9, -4); - testResult = testResult && runTest(256, 256, 256, 4, -0.1, 8.2); - - testResult = testResult && runTest(256, 256, 256, -8.5, 15.9, 0.1); - testResult = testResult && runTest(256, 256, 256, 12.5, -17.9, -0.35); - - testResult = testResult && runTest(256, 256, 256, -3, 5.8, 0.89); - testResult = testResult && runTest(256, 256, 256, 4, 9.1, 2.08); - - testResult = testResult && runTest(256, 256, 256, -8.5, 6.6, 3.67); - testResult = testResult && runTest(256, 256, 256, 12.5, 0.01, -9.9); - - if (testResult) { - passed(); - } else { - exit (EXIT_FAILURE); - } -} diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp deleted file mode 100644 index 7fa5d3930d..0000000000 --- a/tests/src/texture/hipTextureRef2D.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include -#include -#include - -#include -#include "test_common.h" - -texture tex; - -__global__ void tex2DKernel(float* outputData, - int width, int height) { -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2D(tex, x, y); -#endif -} - -int runTest(int argc, char** argv); - -int main(int argc, char** argv) { - checkImageSupport(); - - int testResult = runTest(argc, argv); - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -int runTest(int argc, char** argv) { - int testResult = 1; - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray* hipArray; - hipMallocArray(&hipArray, &channelDesc, width, height); - - hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); - - tex.addressMode[0] = hipAddressModeClamp; - tex.addressMode[1] = hipAddressModeClamp; - tex.filterMode = hipFilterModePoint; - tex.normalized = 0; - - hipBindTextureToArray(tex, hipArray, channelDesc); - - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, width, height); - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = 0; - break; - } - } - } - HIPCHECK(hipUnbindTexture(tex)); - hipFree(dData); - hipFreeArray(hipArray); - return testResult; -} diff --git a/tests/src/texture/simpleTexture2DLayered.cpp b/tests/src/texture/simpleTexture2DLayered.cpp deleted file mode 100644 index b73d909baf..0000000000 --- a/tests/src/texture/simpleTexture2DLayered.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include "test_common.h" - -typedef float T; - -// Texture reference for 2D Layered texture -texture tex2DL; - -__global__ void simpleKernelLayeredArray(T* outputData,int width,int height,int layer) -{ -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - outputData[layer*width*height + y*width + x] = tex2DLayered(tex2DL, x, y, layer); -#endif -} - -//////////////////////////////////////////////////////////////////////////////// -void runTest(int width,int height,int num_layers,texture *tex) -{ - unsigned int size = width * height * num_layers * sizeof(T); - T* hData = (T*) malloc(size); - memset(hData, 0, size); - - for (unsigned int layer = 0; layer < num_layers; layer++){ - for (int i = 0; i < (int)(width * height); i++){ - hData[layer*width*height + i] = i; - } - } - hipChannelFormatDesc channelDesc; - // Allocate array and copy image data - channelDesc = hipCreateChannelDesc(sizeof(T)*8, 0, 0, 0, hipChannelFormatKindFloat); - hipArray *arr; - - HIPCHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height, num_layers), hipArrayLayered)); - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); - myparms.dstArray = arr; - myparms.extent = make_hipExtent(width , height, num_layers); - //myparms.kind = hipMemcpyHostToDevice; - HIPCHECK(hipMemcpy3D(&myparms)); - - // set texture parameters - tex->addressMode[0] = hipAddressModeClamp; - tex->addressMode[1] = hipAddressModeClamp; - tex->filterMode = hipFilterModePoint; - tex->normalized = false; - - // Bind the array to the texture - HIPCHECK(hipBindTextureToArray(*tex, arr, channelDesc)); - - // Allocate device memory for result - T* dData = NULL; - hipMalloc((void **) &dData, size); - dim3 dimBlock(8, 8, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - for (unsigned int layer = 0; layer < num_layers; layer++) - hipLaunchKernelGGL(simpleKernelLayeredArray, dimGrid, dimBlock, 0, 0, dData, width, height, layer); - - HIPCHECK(hipDeviceSynchronize()); - // Allocate mem for the result on host side - T *hOutputData = (T*) malloc(size); - memset(hOutputData, 0, size); - - // copy result from device to host - HIPCHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); - HipTest::checkArray(hData,hOutputData,width,height,num_layers); - - hipFree(dData); - hipFreeArray(arr); - free(hData); - free(hOutputData); -} - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) -{ - checkImageSupport(); - - runTest(512,512,5,&tex2DL); - passed(); -} - diff --git a/tests/src/texture/simpleTexture3D.cpp b/tests/src/texture/simpleTexture3D.cpp deleted file mode 100644 index 8761caed14..0000000000 --- a/tests/src/texture/simpleTexture3D.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* -Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * TEST: %t - * HIT_END - */ -#include "test_common.h" - -//typedef char T; -const char *sampleName = "simpleTexture3D"; - -// Texture reference for 3D texture -texture texf; - -texture texi; - -texture texc; - -template -__global__ void simpleKernel3DArray(T* outputData, - int width, - int height,int depth) -{ -#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texf, k, j, i); - else if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texi, k, j, i); - else if(std::is_same::value) - outputData[i*width*height + j*width + k] = tex3D(texc, k, j, i); - } - } - } -#endif -} - -//////////////////////////////////////////////////////////////////////////////// -//! Run a simple test for tex3D -//////////////////////////////////////////////////////////////////////////////// -template -void runTest(int width,int height,int depth,texture *tex) -{ - unsigned int size = width * height * depth * sizeof(T); - T* hData = (T*) malloc(size); - memset(hData, 0, size); - - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width +k] = i*width*height + j*width + k; - } - } - } - - // Allocate array and copy image data - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); - hipArray *arr; - - HIPCHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height, depth), hipArrayDefault)); - hipMemcpy3DParms myparms = {0}; - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); - myparms.dstArray = arr; - myparms.extent = make_hipExtent(width, height, depth); - myparms.kind = hipMemcpyHostToDevice; - - HIPCHECK(hipMemcpy3D(&myparms)); - - // set texture parameters - tex->addressMode[0] = hipAddressModeClamp; - tex->addressMode[1] = hipAddressModeClamp; - tex->filterMode = hipFilterModePoint; - tex->normalized = false; - - // Bind the array to the texture - HIPCHECK(hipBindTextureToArray(*tex, arr, channelDesc)); - - // Allocate device memory for result - T* dData = NULL; - hipMalloc((void **) &dData, size); - - hipLaunchKernelGGL(simpleKernel3DArray, dim3(1,1,1), dim3(1,1,1), 0, 0, dData, width, height, depth); - HIPCHECK(hipDeviceSynchronize()); - - // Allocate mem for the result on host side - T *hOutputData = (T*) malloc(size); - memset(hOutputData, 0, size); - - // copy result from device to host - HIPCHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); - HipTest::checkArray(hData,hOutputData,width,height,depth); - - hipFree(dData); - hipFreeArray(arr); - free(hData); - free(hOutputData); -} - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) -{ - checkImageSupport(); - - printf("%s starting...\n", sampleName); - for(int i=1;i<25;i++) - { - runTest(i,i,i,&texf); - runTest(i+1,i,i,&texi); - runTest(i,i+1,i,&texc); - } - passed(); -} - diff --git a/tests/src/timer.cpp b/tests/src/timer.cpp deleted file mode 100644 index ea9c6ea1d9..0000000000 --- a/tests/src/timer.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "timer.h" - -#include - -#ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#define VC_EXTRALEAN -#include -#pragma comment(lib, "user32") -#endif - -#ifdef __linux__ -#include -#define NANOSECONDS_PER_SEC 1000000000 -#endif - -CPerfCounter::CPerfCounter() : _clocks(0), _start(0) -{ - -#ifdef _WIN32 - - QueryPerformanceFrequency((LARGE_INTEGER *)&_freq); - -#endif - -#ifdef __linux__ - _freq = NANOSECONDS_PER_SEC; -#endif - -} - -CPerfCounter::~CPerfCounter() -{ - // EMPTY! -} - -void -CPerfCounter::Start(void) -{ - -#ifdef _WIN32 - - if( _start ) - { - MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK); - exit(0); - } - QueryPerformanceCounter((LARGE_INTEGER *)&_start); - -#endif -#ifdef __linux__ - - struct timespec s; - clock_gettime(CLOCK_MONOTONIC, &s); - _start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; - -#endif - -} - -void -CPerfCounter::Stop(void) -{ - i64 n; - -#ifdef _WIN32 - - if( !_start ) - { - MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK); - exit(0); - } - - QueryPerformanceCounter((LARGE_INTEGER *)&n); - -#endif -#ifdef __linux__ - - struct timespec s; - clock_gettime(CLOCK_MONOTONIC, &s); - n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; - -#endif - - n -= _start; - _start = 0; - _clocks += n; -} - -void -CPerfCounter::Reset(void) -{ - -#ifdef _WIN32 - if( _start ) - { - MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK); - exit(0); - } -#endif - _clocks = 0; -} - -double -CPerfCounter::GetElapsedTime(void) -{ -#ifdef _WIN32 - if( _start ) { - MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK); - exit(0); - } -#endif - - return (double)_clocks / (double)_freq; - -} diff --git a/tests/src/timer.h b/tests/src/timer.h deleted file mode 100644 index 28bfeff74b..0000000000 --- a/tests/src/timer.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _TIMER_H_ -#define _TIMER_H_ - -#ifdef _WIN32 -typedef __int64 i64 ; -#endif -#ifdef __linux__ -typedef long long i64; -#endif - -class CPerfCounter { - -public: - CPerfCounter(); - ~CPerfCounter(); - void Start(void); - void Stop(void); - void Reset(void); - double GetElapsedTime(void); - -private: - - i64 _freq; - i64 _clocks; - i64 _start; -}; - -#endif // _TIMER_H_ diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp deleted file mode 100644 index 1bf2ed2531..0000000000 --- a/tests/unit/test_common.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include "test_common.h" - -// standard global variables that can be set on command line -size_t N = 4 * 1024 * 1024; -char memsetval = 0x42; -int memsetD32val = 0xDEADBEEF; -short memsetD16val = 0xDEAD; -char memsetD8val = 0xDE; -int iterations = 1; -unsigned blocksPerCU = 6; // to hide latency -unsigned threadsPerBlock = 256; -int p_gpuDevice = 0; -unsigned p_verbose = 0; -int p_tests = -1; /*which tests to run. Interpretation is left to each test. default:all*/ -#ifdef _WIN64 -const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES="; -const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES="; -const char* PATH_SEPERATOR_STR = "\\"; -const char* NULL_DEVICE = "NUL:"; -#else -const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES"; -const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES"; -const char* PATH_SEPERATOR_STR = "/"; -const char* NULL_DEVICE = "/dev/null"; -#endif - -namespace HipTest { - - -double elapsed_time(long long startTimeUs, long long stopTimeUs) { - return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000)); -} - - -int parseSize(const char* str, size_t* output) { - char* next; - *output = strtoull(str, &next, 0); - int l = strlen(str); - if (l) { - char c = str[l - 1]; // last char. - if ((c == 'k') || (c == 'K')) { - *output *= 1024; - } - if ((c == 'm') || (c == 'M')) { - *output *= (1024 * 1024); - } - if ((c == 'g') || (c == 'G')) { - *output *= (1024 * 1024 * 1024); - } - } - return 1; -} - - -int parseUInt(const char* str, unsigned int* output) { - char* next; - *output = strtoul(str, &next, 0); - return !strlen(next); -} - - -int parseInt(const char* str, int* output) { - char* next; - *output = strtol(str, &next, 0); - return !strlen(next); -} - - -int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg) { - int extraArgs = 1; - for (int i = 1; i < argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, " ")) { - // skip NULL args. - } else if (!strcmp(arg, "--N") || (!strcmp(arg, "-N"))) { - if (++i >= argc || !HipTest::parseSize(argv[i], &N)) { - failed("Bad N size argument"); - } - } else if (!strcmp(arg, "--threadsPerBlock")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &threadsPerBlock)) { - failed("Bad threadsPerBlock argument"); - } - } else if (!strcmp(arg, "--blocksPerCU")) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &blocksPerCU)) { - failed("Bad blocksPerCU argument"); - } - } else if (!strcmp(arg, "--memsetval")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetval argument"); - } - memsetval = ex; - } else if (!strcmp(arg, "--memsetD32val")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetD32val argument"); - } - memsetD32val = ex; - } else if (!strcmp(arg, "--memsetD16val")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetD16val argument"); - } - memsetD16val = ex; - } else if (!strcmp(arg, "--memsetD8val")) { - int ex; - if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { - failed("Bad memsetD8val argument"); - } - memsetD8val = ex; - } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { - failed("Bad iterations argument"); - } - - } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { - failed("Bad gpuDevice argument"); - } - - } else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) { - if (++i >= argc || !HipTest::parseUInt(argv[i], &p_verbose)) { - failed("Bad verbose argument"); - } - } else if (!strcmp(arg, "--tests") || (!strcmp(arg, "-t"))) { - if (++i >= argc || !HipTest::parseInt(argv[i], &p_tests)) { - failed("Bad tests argument"); - } - - } else { - if (failOnUndefinedArg) { - failed("Bad argument '%s'", arg); - } else { - argv[extraArgs++] = argv[i]; - } - } - }; - - return extraArgs; -} - - -unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) { - int device; - HIPCHECK(hipGetDevice(&device)); - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, device)); - - unsigned blocks = props.multiProcessorCount * blocksPerCU; - if (blocks * threadsPerBlock > N) { - blocks = (N + threadsPerBlock - 1) / threadsPerBlock; - } - - return blocks; -} - - -} // namespace HipTest diff --git a/tests/unit/test_common.h b/tests/unit/test_common.h deleted file mode 100644 index 5ac686db2f..0000000000 --- a/tests/unit/test_common.h +++ /dev/null @@ -1,473 +0,0 @@ -/* -Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* - * File is intended to C and CPP compliant hence any CPP specic changes - * should be added into CPP section - * - */ - -#ifdef __cplusplus - #include - #include - #if __CUDACC__ - #include - #else - #include - #endif -#endif - -// ************************ GCC section ************************** -#include - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" - -#define HC __attribute__((hc)) - -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" -#define KGRN "\x1B[32m" -#define KYEL "\x1B[33m" -#define KBLU "\x1B[34m" -#define KMAG "\x1B[35m" -#define KCYN "\x1B[36m" -#define KWHT "\x1B[37m" - -#define passed() \ - printf("%sPASSED!%s\n", KGRN, KNRM); \ - -#define failed(...) \ - printf("%serror: ", KRED); \ - printf(__VA_ARGS__); \ - printf("%s\n",KNRM); \ - return false; - -#define warn(...) \ - printf("%swarn: ", KYEL); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - printf("warn: TEST WARNING\n%s", KNRM); - -#define skipped() printf("%sSkipped subtest %s%s\n",KYEL,__FUNCTION__,KNRM); - -#define HIPCHECK(error) \ - { \ - hipError_t localError = error; \ - if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ - failed("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \ - localError, #error, __FUNCTION__, __LINE__, KNRM); \ - } \ - } - -#define HIPASSERT(condition) \ - if (!(condition)) { \ - failed("%sassertion %s at %s:%d%s \n", KRED, #condition, __FUNCTION__, __LINE__, KNRM); \ - } - - -#define HIPCHECK_API(API_CALL, EXPECTED_ERROR) \ - { \ - hipError_t _e = (API_CALL); \ - if (_e != (EXPECTED_ERROR)) { \ - failed("%sAPI '%s' returned %d(%s) but test expected %d(%s) at %s:%d%s \n", KRED, \ - #API_CALL, _e, hipGetErrorName(_e), EXPECTED_ERROR, \ - hipGetErrorName(EXPECTED_ERROR), __FILE__, __LINE__, KNRM); \ - } \ - } - -#ifdef _WIN64 -#include -#define aligned_alloc(x,y) _aligned_malloc(y,x) -#define aligned_free(x) _aligned_free(x) -#define popen(x,y) _popen(x,y) -#define pclose(x) _pclose(x) -#define setenv(x,y,z) _putenv_s(x,y) -#define unsetenv _putenv -#define fileno(x) _fileno(x) -#define dup(x) _dup(x) -#define dup2(x,y) _dup2(x,y) -#define close(x) _close(x) -#else -#define aligned_free(x) free(x) -#endif - -// standard command-line variables: -extern size_t N; -extern char memsetval; -extern int memsetD32val; -extern short memsetD16val; -extern char memsetD8val; -extern int iterations; -extern unsigned blocksPerCU; -extern unsigned threadsPerBlock; -extern int p_gpuDevice; -extern unsigned p_verbose; -extern int p_tests; -extern const char* HIP_VISIBLE_DEVICES_STR; -extern const char* CUDA_VISIBLE_DEVICES_STR; -extern const char* PATH_SEPERATOR_STR; -extern const char* NULL_DEVICE; - -// ********************* CPP section ********************* -#ifdef __cplusplus - -#ifdef __HIP_PLATFORM_HCC -#define TYPENAME(T) typeid(T).name() -#else -#define TYPENAME(T) "?" -#endif - -namespace HipTest { - -// Returns the current system time in microseconds -inline long long get_time() { -#if __CUDACC__ - struct timeval tv; - gettimeofday(&tv, 0); - return (tv.tv_sec * 1000000) + tv.tv_usec; -#else - return std::chrono::high_resolution_clock::now().time_since_epoch() - /std::chrono::microseconds(1); -#endif -} - -double elapsed_time(long long startTimeUs, long long stopTimeUs); - -int parseSize(const char* str, size_t* output); -int parseUInt(const char* str, unsigned int* output); -int parseInt(const char* str, int* output); -int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg); - -unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N); - -template // pointer type -bool checkArray(T hData, T hOutputData, size_t width, size_t height,size_t depth) -{ - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - int offset = i*width*height + j*width + k; - if (hData[offset] != hOutputData[offset]) { - std::cerr << '[' << i << ',' << j << ',' << k << "]:" << hData[offset] << "----" << hOutputData[offset]<<" "; - failed("mistmatch at:%d %d %d",i,j,k); - } - } - } - } - return true; -} - -template -bool checkArray(T input, T output, size_t height, size_t width) -{ - for(int i=0; i -__global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < NELEM; i += stride) { - C_d[i] = A_d[i] + B_d[i]; - } -} - - -template -__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, - size_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - C_d[i] = A_d[i] + B_d[i]; - } -} - - -template -__global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - // Deliberately do this in an inefficient way to increase kernel runtime - for (int i = 0; i < count; i++) { - for (size_t i = offset; i < NELEM; i += stride) { - C_d[i] = A_d[i] + (T)count; - } - } -} - - -template -__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - // Deliberately do this in an inefficient way to increase kernel runtime - for (int i = 0; i < count; i++) { - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - C_d[i] = A_d[i] + (T)count; - } - } -} - - -template -__global__ void memsetReverse(T* C_d, T val, int64_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - C_d[i] = val; - } -} - - -template -void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) { - // Initialize the host data: - for (size_t i = 0; i < numElements; i++) { - if (A_h) (A_h)[i] = 3.146f + i; // Pi - if (B_h) (B_h)[i] = 1.618f + i; // Phi - if (C_h) (C_h)[i] = 0.0f + i; - } -} - - -template -bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) { - size_t Nbytes = N * sizeof(T); - - if (usePinnedHost) { - if (A_h) { - HIPCHECK(hipHostMalloc((void**)A_h, Nbytes)); - } - if (B_h) { - HIPCHECK(hipHostMalloc((void**)B_h, Nbytes)); - } - if (C_h) { - HIPCHECK(hipHostMalloc((void**)C_h, Nbytes)); - } - } else { - if (A_h) { - *A_h = (T*)malloc(Nbytes); - HIPASSERT(*A_h != NULL); - } - - if (B_h) { - *B_h = (T*)malloc(Nbytes); - HIPASSERT(*B_h != NULL); - } - - if (C_h) { - *C_h = (T*)malloc(Nbytes); - HIPASSERT(*C_h != NULL); - } - } - - setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL); - return true; -} - - -template -bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N, - bool usePinnedHost = false) { - size_t Nbytes = N * sizeof(T); - - if (A_d) { - HIPCHECK(hipMalloc(A_d, Nbytes)); - } - if (B_d) { - HIPCHECK(hipMalloc(B_d, Nbytes)); - } - if (C_d) { - HIPCHECK(hipMalloc(C_d, Nbytes)); - } - - return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost); -} - - -template -bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) { - if (usePinnedHost) { - if (A_h) { - HIPCHECK(hipHostFree(A_h)); - } - if (B_h) { - HIPCHECK(hipHostFree(B_h)); - } - if (C_h) { - HIPCHECK(hipHostFree(C_h)); - } - } else { - if (A_h) { - free(A_h); - } - if (B_h) { - free(B_h); - } - if (C_h) { - free(C_h); - } - } - return true; -} - -template -bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) { - if (A_d) { - HIPCHECK(hipFree(A_d)); - } - if (B_d) { - HIPCHECK(hipFree(B_d)); - } - if (C_d) { - HIPCHECK(hipFree(C_d)); - } - - return freeArraysForHost(A_h, B_h, C_h, usePinnedHost); -} - -#if defined(__HIP_PLATFORM_AMD__) -template -bool initArrays2DPitch(T** A_d, T** B_d, T** C_d, size_t* pitch_A, size_t* pitch_B, size_t* pitch_C, - size_t numW, size_t numH) { - if (A_d) { - HIPCHECK(hipMallocPitch((void**)A_d, pitch_A, numW * sizeof(T), numH)); - } - if (B_d) { - HIPCHECK(hipMallocPitch((void**)B_d, pitch_B, numW * sizeof(T), numH)); - } - if (C_d) { - HIPCHECK(hipMallocPitch((void**)C_d, pitch_C, numW * sizeof(T), numH)); - } - - HIPASSERT(*pitch_A == *pitch_B); - HIPASSERT(*pitch_A == *pitch_C) - return true; -} - -inline bool initHIPArrays(hipArray** A_d, hipArray** B_d, hipArray** C_d, - const hipChannelFormatDesc* desc, const size_t numW, const size_t numH, - const unsigned int flags) { - if (A_d) { - HIPCHECK(hipMallocArray(A_d, desc, numW, numH, flags)); - } - if (B_d) { - HIPCHECK(hipMallocArray(B_d, desc, numW, numH, flags)); - } - if (C_d) { - HIPCHECK(hipMallocArray(C_d, desc, numW, numH, flags)); - } - return true; -} -#endif - -// Assumes C_h contains vector add of A_h + B_h -// Calls the test "failed" macro if a mismatch is detected. -template -size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true, - bool reportMismatch = true) { - size_t mismatchCount = 0; - size_t firstMismatch = 0; - size_t mismatchesToPrint = 10; - for (size_t i = 0; i < N; i++) { - T expected = A_h[i] + B_h[i]; - if (result_H[i] != expected) { - if (mismatchCount == 0) { - firstMismatch = i; - } - mismatchCount++; - if ((mismatchCount <= mismatchesToPrint) && expectMatch) { - std::cout << std::fixed << std::setprecision(32); - std::cout << "At " << i << std::endl; - std::cout << " Computed:" << result_H[i] << std::endl; - std::cout << " Expected:" << expected << std::endl; - } - } - } - - if (reportMismatch) { - if (expectMatch) { - if (mismatchCount) { - failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); - } - } else { - if (mismatchCount == 0) { - failed("expected mismatches but did not detect any!"); - } - } - } - - return mismatchCount; -} - - -// Assumes C_h contains vector add of A_h + B_h -// Calls the test "failed" macro if a mismatch is detected. -template -bool checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) { - size_t mismatchCount = 0; - size_t firstMismatch = 0; - size_t mismatchesToPrint = 10; - for (size_t i = 0; i < N; i++) { - if (result_H[i] != expected_H[i]) { - if (mismatchCount == 0) { - firstMismatch = i; - } - mismatchCount++; - if ((mismatchCount <= mismatchesToPrint) && expectMatch) { - std::cout << std::fixed << std::setprecision(32); - std::cout << "At " << i << std::endl; - std::cout << " Computed:" << result_H[i] << std::endl; - std::cout << " Expected:" << expected_H[i] << std::endl; - } - } - } - - if (expectMatch) { - if (mismatchCount) { - fprintf(stderr, "%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); - // failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); - } - } else { - if (mismatchCount == 0) { - failed("expected mismatches but did not detect any!"); - } - } - return true; -} - -}; // namespace HipTest -#endif //__cplusplus