diff --git a/catch/ABM/AddKernels/add.cc b/catch/ABM/AddKernels/add.cc index 785d975945..1b7c56cdfa 100644 --- a/catch/ABM/AddKernels/add.cc +++ b/catch/ABM/AddKernels/add.cc @@ -29,6 +29,7 @@ TEMPLATE_TEST_CASE("ABM_AddKernel_MultiTypeMultiSize", "", int, long, float, lon REQUIRE(res == hipSuccess); hipLaunchKernelGGL(add, 1, size, 0, 0, d_a, d_b, d_c, size); + HIP_CHECK(hipGetLastError()); res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost); REQUIRE(res == hipSuccess); diff --git a/catch/TypeQualifiers/hipManagedKeyword.cc b/catch/TypeQualifiers/hipManagedKeyword.cc index 57462764a9..4d6a97b119 100644 --- a/catch/TypeQualifiers/hipManagedKeyword.cc +++ b/catch/TypeQualifiers/hipManagedKeyword.cc @@ -53,6 +53,7 @@ TEST_CASE("Unit_hipManagedKeyword_SingleGpu") { hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast(A), static_cast(B)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); float maxError = 0.0f; diff --git a/catch/include/hip_test_checkers.hh b/catch/include/hip_test_checkers.hh index 3e152f099b..bd1fa62610 100644 --- a/catch/include/hip_test_checkers.hh +++ b/catch/include/hip_test_checkers.hh @@ -26,7 +26,7 @@ THE SOFTWARE. #include #include #include - +#define TOL 0.001 #define guarantee(cond, str) \ { \ if (!(cond)) { \ @@ -45,7 +45,7 @@ size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectM size_t mismatchesToPrint = 10; for (size_t i = 0; i < N; i++) { T expected = F(A[i], B[i]); - if (Out[i] != expected) { + if (std::fabs(Out[i] - expected) > TOL) { if (mismatchCount == 0) { firstMismatch = i; } diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index 3a8501f62c..ed8412b673 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -270,6 +270,7 @@ void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerB launchRTCKernel(kernel, numBlocks, numThreads, memPerBlock, stream, std::forward(packedArgs)...); #endif +HIP_CHECK(hipGetLastError()); } //--- diff --git a/catch/multiproc/hipIpcEventHandle.cc b/catch/multiproc/hipIpcEventHandle.cc index 4a4be2eb5a..78b12bdff3 100644 --- a/catch/multiproc/hipIpcEventHandle.cc +++ b/catch/multiproc/hipIpcEventHandle.cc @@ -221,6 +221,7 @@ void runMultiProcKernel(ipcEventInfo_t *shmEventInfo, int index) { const dim3 blocks(BUF_SIZE / threads.x, 1); hipLaunchKernelGGL(computeKernel, dim3(blocks), dim3(threads), 0, 0, d_ptr + index *BUF_SIZE, d_ptr, index + 1); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipEventRecord(event)); // Barrier 2 : Signals that event is recorded diff --git a/catch/multiproc/hipMallocConcurrencyMproc.cc b/catch/multiproc/hipMallocConcurrencyMproc.cc index d13a31949d..7134801dac 100644 --- a/catch/multiproc/hipMallocConcurrencyMproc.cc +++ b/catch/multiproc/hipMallocConcurrencyMproc.cc @@ -120,7 +120,7 @@ static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, N); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) { diff --git a/catch/stress/printf/Stress_printf_ComplexKernels.cc b/catch/stress/printf/Stress_printf_ComplexKernels.cc index eec8e162f7..46bb6f74b1 100644 --- a/catch/stress/printf/Stress_printf_ComplexKernels.cc +++ b/catch/stress/printf/Stress_printf_ComplexKernels.cc @@ -276,12 +276,15 @@ bool test_printf_multistream(uint32_t num_blocks, hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, stream[i], Ad, Bd, iterCount); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1), dim3(1, threads_per_block, 1), 0, stream[i], Ad, Bd, iterCount); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks), dim3(1, 1, threads_per_block), 0, stream[i], Ad, Bd, iterCount); + HIP_CHECK(hipGetLastError()); } HIP_CHECK(hipDeviceSynchronize()); for (int i = 0; i < NUM_STREAM; i++) { @@ -368,12 +371,15 @@ bool test_printf_multigpu(int gpu, hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, 0, Ad, Bd, iterCount); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1), dim3(1, threads_per_block, 1), 0, 0, Ad, Bd, iterCount); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks), dim3(1, 1, threads_per_block), 0, 0, Ad, Bd, iterCount); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); std::ifstream CapturedData = captured.getCapturedData(); char *buffer = new char[CHUNK_SIZE]; diff --git a/catch/stress/printf/Stress_printf_SimpleKernels.cc b/catch/stress/printf/Stress_printf_SimpleKernels.cc index 1ec20ec873..82dc309313 100644 --- a/catch/stress/printf/Stress_printf_SimpleKernels.cc +++ b/catch/stress/printf/Stress_printf_SimpleKernels.cc @@ -246,6 +246,7 @@ bool test_printf_conststr(uint32_t num_blocks, uint32_t threads_per_block, hipLaunchKernelGGL(kernel_printf_conststr, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, 0, iterCount); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(0)); std::ifstream CapturedData = captured.getCapturedData(); char *buffer = new char[CHUNK_SIZE]; @@ -308,6 +309,7 @@ bool test_printf_two_conditionalstr(uint32_t num_blocks, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, 0, iterCount); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(0)); std::ifstream CapturedData = captured.getCapturedData(); char *buffer = new char[CHUNK_SIZE]; @@ -370,6 +372,7 @@ bool test_printf_single_conditionalstr(uint32_t num_blocks, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, 0, iterCount); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(0)); std::ifstream CapturedData = captured.getCapturedData(); char *buffer = new char[CHUNK_SIZE]; @@ -427,6 +430,7 @@ bool test_variable_str(uint32_t print_limit, hipLaunchKernelGGL(func, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, 0, iterCount, Ad); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(0)); HIP_CHECK(hipMemcpy(Ah, Ad, buffsize*sizeof(int32_t), hipMemcpyDeviceToHost)); @@ -483,6 +487,7 @@ bool test_decimal_str(uint32_t num_blocks, uint32_t threads_per_block, hipLaunchKernelGGL(kernel_decimal_calculation, dim3(num_blocks, 1, 1), dim3(threads_per_block, 1, 1), 0, 0, iterCount, maxPrecision); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(0)); std::ifstream CapturedData = captured.getCapturedData(); char *buffer = new char[CHUNK_SIZE]; diff --git a/catch/unit/cooperativeGrps/coalesced_groups_shfl_down.cc b/catch/unit/cooperativeGrps/coalesced_groups_shfl_down.cc index f4220f02bb..63d569c0e5 100644 --- a/catch/unit/cooperativeGrps/coalesced_groups_shfl_down.cc +++ b/catch/unit/cooperativeGrps/coalesced_groups_shfl_down.cc @@ -157,6 +157,7 @@ static void test_group_partition(unsigned int tileSz) { // Launch Kernel hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0, dResult, tileSz, i); + HIP_CHECK(hipGetLastError()); err = hipDeviceSynchronize(); if (err != hipSuccess) { fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); @@ -217,6 +218,7 @@ static void test_shfl_down() { // Launch Kernel hipLaunchKernelGGL(kernel_shfl_down, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i); + HIP_CHECK(hipGetLastError()); hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost); err = hipDeviceSynchronize(); if (err != hipSuccess) { diff --git a/catch/unit/cooperativeGrps/coalesced_groups_shfl_up.cc b/catch/unit/cooperativeGrps/coalesced_groups_shfl_up.cc index c317452549..e3c9d3d9f4 100644 --- a/catch/unit/cooperativeGrps/coalesced_groups_shfl_up.cc +++ b/catch/unit/cooperativeGrps/coalesced_groups_shfl_up.cc @@ -138,6 +138,7 @@ static void test_group_partition(unsigned tileSz) { // Launch Kernel hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0, dPtr, tileSz, i); + HIP_CHECK(hipGetLastError()); hipMemcpy(hPtr, dPtr, arrSize, hipMemcpyDeviceToHost); err = hipDeviceSynchronize(); if (err != hipSuccess) { @@ -207,6 +208,7 @@ static void test_shfl_up() { hipLaunchKernelGGL(kernel_shfl_up, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i); hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost); + HIP_CHECK(hipGetLastError()); err = hipDeviceSynchronize(); if (err != hipSuccess) { fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err)); diff --git a/catch/unit/cooperativeGrps/simple_coalesced_groups.cc b/catch/unit/cooperativeGrps/simple_coalesced_groups.cc index 008595a04d..cef1d94e96 100644 --- a/catch/unit/cooperativeGrps/simple_coalesced_groups.cc +++ b/catch/unit/cooperativeGrps/simple_coalesced_groups.cc @@ -244,6 +244,7 @@ static void test_active_threads_grouping() { // Launch Kernel hipLaunchKernelGGL(kernel_coalesced_active_groups, blockSize, threadsPerBlock, 0, 0); + HIP_CHECK(hipGetLastError()); err = hipDeviceSynchronize(); if (err != hipSuccess) { @@ -308,6 +309,7 @@ static void test_group_partition(unsigned int tileSz, bool useGlobalMem) { if (useGlobalMem) { hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock, 0, 0, tileSz, dResult, useGlobalMem, globalMem, i); + HIP_CHECK(hipGetLastError()); err = hipDeviceSynchronize(); if (err != hipSuccess) { @@ -316,6 +318,7 @@ static void test_group_partition(unsigned int tileSz, bool useGlobalMem) { } else { hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0, tileSz, dResult, useGlobalMem, globalMem, i); + HIP_CHECK(hipGetLastError()); err = hipDeviceSynchronize(); if (err != hipSuccess) { @@ -390,6 +393,7 @@ static void test_shfl_any_to_any() { // Launch Kernel hipLaunchKernelGGL(kernel_shfl_any_to_any, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0 , dPtr, dsrcArr, dResults, i); + HIP_CHECK(hipGetLastError()); hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost); err = hipDeviceSynchronize(); if (err != hipSuccess) { @@ -459,6 +463,7 @@ static void test_shfl_broadcast() { // Launch Kernel hipLaunchKernelGGL(kernel_shfl, blockSize, threadsPerBlock, threadsPerBlock * sizeof(int), 0, dPtr, dResults, srcLane, i); + HIP_CHECK(hipGetLastError()); hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost); err = hipDeviceSynchronize(); if (err != hipSuccess) { diff --git a/catch/unit/device/hipDeviceSynchronize.cc b/catch/unit/device/hipDeviceSynchronize.cc index a73cc2b65e..0b54463159 100644 --- a/catch/unit/device/hipDeviceSynchronize.cc +++ b/catch/unit/device/hipDeviceSynchronize.cc @@ -27,6 +27,7 @@ THE SOFTWARE. #define _SIZE sizeof(int) * 1024 * 1024 #define NUM_STREAMS 2 +#define NUM_ITERS 1 << 30 static __global__ void Iter(int* Ad, int num) { int tx = threadIdx.x + blockIdx.x * blockDim.x; @@ -57,7 +58,8 @@ TEST_CASE("Unit_hipDeviceSynchronize_Functional") { } for (int i = 0; i < NUM_STREAMS; i++) { hipLaunchKernelGGL(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, - stream[i], Ad[i], 1 << 30); + stream[i], Ad[i], NUM_ITERS); + HIP_CHECK(hipGetLastError()); } for (int i = 0; i < NUM_STREAMS; i++) { HIP_CHECK(hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, @@ -71,7 +73,7 @@ TEST_CASE("Unit_hipDeviceSynchronize_Functional") { // Conservative implementations which synchronize the hipMemcpyAsync will // fail, ie if HIP_LAUNCH_BLOCKING=true. - CHECK(1 << 30 != A[NUM_STREAMS - 1][0] - 1); + CHECK(NUM_ITERS != A[NUM_STREAMS - 1][0] - 1); HIP_CHECK(hipDeviceSynchronize()); - CHECK(1 << 30 == A[NUM_STREAMS - 1][0] - 1); + CHECK(NUM_ITERS == A[NUM_STREAMS - 1][0] - 1); } diff --git a/catch/unit/device/hipGetDeviceProperties.cc b/catch/unit/device/hipGetDeviceProperties.cc index 0bb97af52c..ae8d7db439 100644 --- a/catch/unit/device/hipGetDeviceProperties.cc +++ b/catch/unit/device/hipGetDeviceProperties.cc @@ -163,6 +163,7 @@ TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst") { hipMemcpyHostToDevice)); hipLaunchKernelGGL(mykernel, dim3(1), dim3(1), 0, 0, archProp_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(archProp_h, archProp_d, NUM_OF_ARCHPROP*sizeof(int), hipMemcpyDeviceToHost)); // Validate the host architecture property with device diff --git a/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc index 3622300514..58e0d1d6a2 100644 --- a/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc +++ b/catch/unit/deviceLib/AtomicAdd_Coherent_withnoUnsafeflag.cc @@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithnounsafeflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; testResult = HipTest::assemblyFile_Verification( diff --git a/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc b/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc index 4d12ea1f2c..300b84fed0 100644 --- a/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc +++ b/catch/unit/deviceLib/AtomicAdd_Coherent_withoutflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithoutflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; testResult = HipTest::assemblyFile_Verification( diff --git a/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc index 27120d930e..5472b6225f 100644 --- a/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc +++ b/catch/unit/deviceLib/AtomicAdd_Coherent_withunsafeflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_CoherentwithUnsafeflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; diff --git a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc index 01f6e6ec07..98df491998 100644 --- a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc +++ b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withnoUnsafeflag.cc @@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithnounsafeflag", "", dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc index a5d4c58e94..38ba5a5690 100644 --- a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc +++ b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withoutflag.cc @@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithoutflag", "", dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc index 59e4ccfeb4..6bfff7262c 100644 --- a/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc +++ b/catch/unit/deviceLib/AtomicAdd_NonCoherent_withunsafeflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithUnsafeflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/BuiltIns_fadd.cc b/catch/unit/deviceLib/BuiltIns_fadd.cc index 472e89491c..2eaaa94baa 100644 --- a/catch/unit/deviceLib/BuiltIns_fadd.cc +++ b/catch/unit/deviceLib/BuiltIns_fadd.cc @@ -76,6 +76,7 @@ TEST_CASE("Unit_BuiltInAtomicAdd_CoherentGlobalMem") { hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1), 0, 0, A_d, result); + HIP_CHECK(hipGetLastError()); std::cout << "test 1" << std::endl; HIP_CHECK(hipDeviceSynchronize()); REQUIRE(A_h[0] == INITIAL_VAL); @@ -118,6 +119,7 @@ TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMem") { hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(result)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/BuiltIns_fmax.cc b/catch/unit/deviceLib/BuiltIns_fmax.cc index 40f4c52397..5049348c61 100644 --- a/catch/unit/deviceLib/BuiltIns_fmax.cc +++ b/catch/unit/deviceLib/BuiltIns_fmax.cc @@ -112,6 +112,7 @@ TEST_CASE("Unit_BuiltinAtomics_fmaxCoherentGlobalMem") { HIP_CHECK(hipMalloc(reinterpret_cast(&result), sizeof(double))); hipLaunchKernelGGL(unsafeAtomicMax_GlobalMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), result); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); REQUIRE(*B_h == 0); @@ -159,9 +160,11 @@ TEST_CASE("Unit_BuiltinAtomics_fmaxNonCoherentGlobalFlatMem") { if (mem_type) { hipLaunchKernelGGL(unsafeAtomicMax_GlobalMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), result); + HIP_CHECK(hipGetLastError()); } else { hipLaunchKernelGGL(unsafeAtomicMax_FlatMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), result); + HIP_CHECK(hipGetLastError()); } HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/BuiltIns_fmin.cc b/catch/unit/deviceLib/BuiltIns_fmin.cc index 8ff1814db3..25e52eff29 100644 --- a/catch/unit/deviceLib/BuiltIns_fmin.cc +++ b/catch/unit/deviceLib/BuiltIns_fmin.cc @@ -112,6 +112,7 @@ TEST_CASE("Unit_BuiltinAtomics_fminCoherentGlobalMem") { HIP_CHECK(hipMalloc(reinterpret_cast(&result), sizeof(double))); hipLaunchKernelGGL(unsafeAtomicMin_GlobalMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), result); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); REQUIRE(*B_h == 0); @@ -159,9 +160,11 @@ TEST_CASE("Unit_BuiltinAtomics_fminNonCoherentGlobalFlatMem") { if (mem_type) { hipLaunchKernelGGL(unsafeAtomicMin_GlobalMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), result); + HIP_CHECK(hipGetLastError()); } else { hipLaunchKernelGGL(unsafeAtomicMin_FlatMem, dim3(1), dim3(1), 0, 0, static_cast(A_d), result); + HIP_CHECK(hipGetLastError()); } HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/anyAll.cc b/catch/unit/deviceLib/anyAll.cc index f9e690941c..9b4cfca3ec 100644 --- a/catch/unit/deviceLib/anyAll.cc +++ b/catch/unit/deviceLib/anyAll.cc @@ -65,6 +65,7 @@ TEST_CASE("Unit_AnyAll_CompileTest") { hipLaunchKernelGGL(warpvote, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0, device_any, device_all, pshift); + HIP_CHECK(hipGetLastError()); HIP_CHECK( hipMemcpy(host_any, device_any, Num_Warps_per_Grid * sizeof(int), hipMemcpyDeviceToHost)); HIP_CHECK( diff --git a/catch/unit/deviceLib/ballot.cc b/catch/unit/deviceLib/ballot.cc index 09de1fa5b6..d7870d1b15 100644 --- a/catch/unit/deviceLib/ballot.cc +++ b/catch/unit/deviceLib/ballot.cc @@ -66,6 +66,7 @@ TEST_CASE("Unit_ballot") { hipLaunchKernelGGL(gpu_ballot, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0, device_ballot, Num_Warps_per_Block, pshift); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/bitExtract.cc b/catch/unit/deviceLib/bitExtract.cc index b8997db021..9c97924572 100644 --- a/catch/unit/deviceLib/bitExtract.cc +++ b/catch/unit/deviceLib/bitExtract.cc @@ -146,6 +146,7 @@ TEST_CASE("Unit_bitExtract") { hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, deviceOut32, deviceSrc032, deviceSrc132, deviceSrc232, deviceOut64, deviceSrc064, deviceSrc164, deviceSrc264); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/bitInsert.cc b/catch/unit/deviceLib/bitInsert.cc index 27a63663fd..57c1a66fd4 100644 --- a/catch/unit/deviceLib/bitInsert.cc +++ b/catch/unit/deviceLib/bitInsert.cc @@ -158,6 +158,7 @@ TEST_CASE("Unit_bitInsert") { hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, deviceOut32, deviceSrc032, deviceSrc132, deviceSrc232, deviceSrc332, deviceOut64, deviceSrc064, deviceSrc164, deviceSrc264, deviceSrc364); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/brev.cc b/catch/unit/deviceLib/brev.cc index a28c0b688c..20d2395f2a 100644 --- a/catch/unit/deviceLib/brev.cc +++ b/catch/unit/deviceLib/brev.cc @@ -115,6 +115,7 @@ TEST_CASE("Unit_brev") { hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, deviceD, WIDTH, HEIGHT); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/clz.cc b/catch/unit/deviceLib/clz.cc index 82c7346f9c..f3edd42105 100644 --- a/catch/unit/deviceLib/clz.cc +++ b/catch/unit/deviceLib/clz.cc @@ -136,6 +136,7 @@ TEST_CASE("Unit_clz") { dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, deviceD, WIDTH, HEIGHT); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/ffs.cc b/catch/unit/deviceLib/ffs.cc index 7e3828e286..4cda0175f0 100644 --- a/catch/unit/deviceLib/ffs.cc +++ b/catch/unit/deviceLib/ffs.cc @@ -110,6 +110,7 @@ TEST_CASE("Unit_ffs") { dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, deviceD, WIDTH, HEIGHT); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/floatMath.cc b/catch/unit/deviceLib/floatMath.cc index 430df9a847..d13a251303 100644 --- a/catch/unit/deviceLib/floatMath.cc +++ b/catch/unit/deviceLib/floatMath.cc @@ -56,6 +56,7 @@ TEST_CASE("Unit_deviceFunctions_CompileTest") { res = hipMalloc((void**)&Outd, SIZE); REQUIRE(res == hipSuccess); hipLaunchKernelGGL(floatMath, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, Ind, Outd); + HIP_CHECK(hipGetLastError()); res = hipDeviceSynchronize(); REQUIRE(res == hipSuccess); res = hipGetLastError(); diff --git a/catch/unit/deviceLib/floatTM.cc b/catch/unit/deviceLib/floatTM.cc index df04904d59..2ed208e392 100644 --- a/catch/unit/deviceLib/floatTM.cc +++ b/catch/unit/deviceLib/floatTM.cc @@ -129,6 +129,7 @@ template void testType(int msize) { auto kernel = testOperationsGPU; hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/funnelshift.cc b/catch/unit/deviceLib/funnelshift.cc index 881f51947a..f5b5cfefab 100644 --- a/catch/unit/deviceLib/funnelshift.cc +++ b/catch/unit/deviceLib/funnelshift.cc @@ -145,7 +145,7 @@ TEST_CASE("Unit_funnelshift") { hipLaunchKernelGGL(funnelshift_kernel, dim3(1), dim3(1), 0, 0, device_l_output, device_lc_output, device_r_output, device_rc_output); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(host_l_output, device_l_output, NUM_TESTS * sizeof(unsigned int), hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpy(host_lc_output, device_lc_output, NUM_TESTS * sizeof(unsigned int), diff --git a/catch/unit/deviceLib/hipTestDeviceSymbol.cc b/catch/unit/deviceLib/hipTestDeviceSymbol.cc index d1c39b600a..c7bfa59283 100644 --- a/catch/unit/deviceLib/hipTestDeviceSymbol.cc +++ b/catch/unit/deviceLib/hipTestDeviceSymbol.cc @@ -69,6 +69,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") { hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream)); HIP_CHECK(hipStreamSynchronize(stream)); hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream)); @@ -83,6 +84,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") { SECTION("Calling hipMemcpyTo/FromSymbol - validate value in host memory") { HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice)); hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost)); @@ -99,6 +101,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") { hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream)); HIP_CHECK(hipStreamSynchronize(stream)); hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); HIP_CHECK( hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream)); @@ -116,6 +119,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") { hipStreamPerThread)); HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, hipStreamPerThread)); @@ -139,6 +143,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") { HIP_CHECK(hipGetSymbolAddress((void**)&symbolAddress, HIP_SYMBOL(globalConst))); HIP_CHECK(hipMalloc((void**)&checkOkD, sizeof(bool))); hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost)); HIP_CHECK(hipFree(checkOkD)); HIP_ASSERT(checkOk); @@ -171,6 +176,7 @@ TEST_CASE("Unit_hipGetSymbolAddressAndSize_Validation") { HIP_CHECK(hipMalloc(&checkOkD, sizeof(bool))); hipLaunchKernelGGL(checkGlobalConstAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolArrAddress, checkOkD); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost)); HIP_CHECK(hipFree(checkOkD)); HIP_ASSERT(checkOk); @@ -184,6 +190,7 @@ TEST_CASE("Unit_hipGetSymbolAddressAndSize_Validation") { HIP_CHECK(hipMalloc(&checkOkD, sizeof(bool))); hipLaunchKernelGGL(checkStaticConstVarAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolVarAddress, checkOkD); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost)); HIP_CHECK(hipFree(checkOkD)); HIP_ASSERT(checkOk); diff --git a/catch/unit/deviceLib/ldg.cc b/catch/unit/deviceLib/ldg.cc index a485740e8c..eec83dd710 100644 --- a/catch/unit/deviceLib/ldg.cc +++ b/catch/unit/deviceLib/ldg.cc @@ -100,6 +100,7 @@ template int dataTypesRun() { dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, static_cast(deviceB), WIDTH, HEIGHT); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); @@ -148,7 +149,7 @@ template int dataTypesRun2() { dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, static_cast(deviceB), WIDTH, HEIGHT); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); @@ -198,7 +199,7 @@ template int dataTypesRun4() { dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, static_cast(deviceB), WIDTH, HEIGHT); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/mbcnt.cc b/catch/unit/deviceLib/mbcnt.cc index f9ffa7ef72..1fc37ee1c3 100644 --- a/catch/unit/deviceLib/mbcnt.cc +++ b/catch/unit/deviceLib/mbcnt.cc @@ -63,7 +63,7 @@ TEST_CASE("Unit_mbcnt") { hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, device_mbcnt_lo, device_mbcnt_hi, device_lane_id); - + HIP_CHECK(hipGetLastError()); unsigned int* host_mbcnt_lo = (unsigned int*)malloc(buffer_size); unsigned int* host_mbcnt_hi = (unsigned int*)malloc(buffer_size); unsigned int* host_lane_id = (unsigned int*)malloc(buffer_size); diff --git a/catch/unit/deviceLib/popc.cc b/catch/unit/deviceLib/popc.cc index 29d9764692..a86bb11320 100644 --- a/catch/unit/deviceLib/popc.cc +++ b/catch/unit/deviceLib/popc.cc @@ -100,7 +100,7 @@ TEST_CASE("Unit_popc") { hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB, deviceC, deviceD, WIDTH, HEIGHT); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/syncthreadsand.cc b/catch/unit/deviceLib/syncthreadsand.cc index 04a364345b..4bbc2efcd2 100644 --- a/catch/unit/deviceLib/syncthreadsand.cc +++ b/catch/unit/deviceLib/syncthreadsand.cc @@ -88,7 +88,7 @@ static void test_syncthreads_and(int blockSize) { // Launch Kernel hipLaunchKernelGGL(kernel_syncthreads_and, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD, allThreadsOneD, oneThreadZeroD, allThreadsMinusOneD); - + HIP_CHECK(hipGetLastError()); // Copy result from device to host HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/syncthreadscount.cc b/catch/unit/deviceLib/syncthreadscount.cc index ee66810d7f..9a78dadfee 100644 --- a/catch/unit/deviceLib/syncthreadscount.cc +++ b/catch/unit/deviceLib/syncthreadscount.cc @@ -95,7 +95,7 @@ void test_syncthreads_count(int blockSize) { // Launch Kernel hipLaunchKernelGGL(kernel_syncthreads_count, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD, allThreadsOneD, oddThreadsOneD, allThreadsMinusOneD, allThreadsIdD); - + HIP_CHECK(hipGetLastError()); // Copy result from device to host HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/syncthreadsor.cc b/catch/unit/deviceLib/syncthreadsor.cc index 511c655ad3..a06bc32795 100644 --- a/catch/unit/deviceLib/syncthreadsor.cc +++ b/catch/unit/deviceLib/syncthreadsor.cc @@ -88,7 +88,7 @@ static void test_syncthreads_or(int blockSize) { // Launch Kernel hipLaunchKernelGGL(kernel_syncthreads_or, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD, allThreadsOneD, oneThreadOneD, allThreadsMinusOneD); - + HIP_CHECK(hipGetLastError()); // Copy result from device to host HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost)); HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost)); diff --git a/catch/unit/deviceLib/threadfence_system.cc b/catch/unit/deviceLib/threadfence_system.cc index 322d19507f..5d65366708 100644 --- a/catch/unit/deviceLib/threadfence_system.cc +++ b/catch/unit/deviceLib/threadfence_system.cc @@ -92,6 +92,7 @@ TEST_CASE("Unit_threadfence_system") { HIP_CHECK(hipSetDevice(next_id - 1)); hipLaunchKernelGGL(gpu_round_robin, dim_grid, dim_block, 0, 0x0, next_id, num_dev, num_iter, data, flag); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); })); } diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc index 384347de77..596fb1ef04 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withnounsafeflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithnoUnsafeflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc index 993705b764..685f91c6ca 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withoutflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Coherentwithoutflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc index 868257384a..4263e412e1 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_Coherent_withunsafeflag.cc @@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithUnsafeflag", "", hipLaunchKernelGGL(AtomicCheck, dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc index aff54abd14..b0f1d86421 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withnounsafeflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentnounsafeatomicsflag", "", dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc index d0b611d89c..fc298a8592 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withoutflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithoutflag", "", dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc index ba4af3df96..bff5e00483 100644 --- a/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc +++ b/catch/unit/deviceLib/unsafeAtomicAdd_NonCoherent_withunsafeflag.cc @@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithunsafeatomicsflag", "", dim3(1), dim3(1), 0, 0, A_d, result_d); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); bool testResult; REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL); diff --git a/catch/unit/deviceLib/vectorTypesDevice.cc b/catch/unit/deviceLib/vectorTypesDevice.cc index c4237e28ed..5a842809f4 100644 --- a/catch/unit/deviceLib/vectorTypesDevice.cc +++ b/catch/unit/deviceLib/vectorTypesDevice.cc @@ -224,6 +224,7 @@ template bool run_CheckSharedVectorType() { if (hipMalloc(&ptr, sizeof(bool)) != HIP_SUCCESS) return false; unique_ptr correct{ptr, hipFree}; hipLaunchKernelGGL((CheckSharedVectorType), dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get()); + HIP_CHECK(hipGetLastError()); bool passed = true; if (hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)) != HIP_SUCCESS) { return false; @@ -252,7 +253,7 @@ TEST_CASE("Unit_vectorTypes_CompileTest") { unique_ptr correct{ptr, hipFree}; hipLaunchKernelGGL(CheckVectorTypes, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get()); - + HIP_CHECK(hipGetLastError()); bool passed = true; res = hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)); REQUIRE(res == hipSuccess); diff --git a/catch/unit/event/Unit_hipEvent.cc b/catch/unit/event/Unit_hipEvent.cc index d0b2fd0a1e..6654c58d5b 100644 --- a/catch/unit/event/Unit_hipEvent.cc +++ b/catch/unit/event/Unit_hipEvent.cc @@ -80,6 +80,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_ HIP_CHECK(hipEventRecord(start, stream)); hipLaunchKernelGGL(HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(C_d), C_h, numElements, count); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipEventRecord(stop, stream)); if (waitStart) { @@ -111,7 +112,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_ REQUIRE(false); } - if (e == hipSuccess) assert(t == 0.0f); + if (e == hipSuccess) HIP_ASSERT(t == 0.0f); // stop usually ready unless we skipped the synchronization (syncNone) e = hipEventElapsedTime(&t, stop, stop); @@ -137,7 +138,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_ HIP_ASSERT(hipEventElapsedTime(&t, start, neverCreated) == hipErrorInvalidHandle); HIP_ASSERT(hipEventElapsedTime(&t, neverRecorded, stop) == hipErrorInvalidHandle); - HIP_ASSERT(hipEventElapsedTime(&t, start, neverRecorded) == hipErrorInvalidHandle); + HIP_ASSERT(hipGetLastError() == hipErrorInvalidHandle); } HIP_CHECK(hipEventDestroy(neverRecorded)); diff --git a/catch/unit/event/Unit_hipEventIpc.cc b/catch/unit/event/Unit_hipEventIpc.cc index 1d59089bf4..cc0d70ae71 100644 --- a/catch/unit/event/Unit_hipEventIpc.cc +++ b/catch/unit/event/Unit_hipEventIpc.cc @@ -65,7 +65,7 @@ TEST_CASE("Unit_hipEventIpc") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, N); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipEventRecord(stop, NULL)); HIP_CHECK(hipEventSynchronize(stop)); diff --git a/catch/unit/event/Unit_hipEventRecord.cc b/catch/unit/event/Unit_hipEventRecord.cc index 7bf5462637..70e5a684ec 100644 --- a/catch/unit/event/Unit_hipEventRecord.cc +++ b/catch/unit/event/Unit_hipEventRecord.cc @@ -88,7 +88,7 @@ TEST_CASE("Unit_hipEventRecord") { HipTest::launchKernel(HipTest::vectorADD, blocks, 1, 0, 0, static_cast(A_d), static_cast(B_d), C_d, N); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipEventRecord(stop, NULL)); HIP_CHECK(hipEventSynchronize(stop)); long long hostStop = HipTest::get_time(); diff --git a/catch/unit/memory/hipHostRegister.cc b/catch/unit/memory/hipHostRegister.cc index 8d30ae30f7..f6964db616 100644 --- a/catch/unit/memory/hipHostRegister.cc +++ b/catch/unit/memory/hipHostRegister.cc @@ -101,7 +101,8 @@ TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset", "", i // Reference the registered device pointer Ad from inside the kernel: for (int i = 0; i < num_devices; i++) { HIP_CHECK(hipSetDevice(i)); - hipLaunchKernelGGL(Inc, dim3(LEN / 512), dim3(512), 0, 0, Ad[i]); + hipLaunchKernelGGL(Inc, dim3(LEN / 32), dim3(32), 0, 0, Ad[i]); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); } REQUIRE(A[10] == 1 + static_cast(num_devices)); diff --git a/catch/unit/memory/hipMallocConcurrency.cc b/catch/unit/memory/hipMallocConcurrency.cc index 1283284022..885a13ee7e 100644 --- a/catch/unit/memory/hipMallocConcurrency.cc +++ b/catch/unit/memory/hipMallocConcurrency.cc @@ -111,7 +111,7 @@ static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, N); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) { @@ -219,7 +219,7 @@ static bool validateMemoryOnGpuMThread(int gpu, bool concurOnOneGPU = false) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, N); - + HIP_CHECK(hipGetLastError()); HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) { diff --git a/catch/unit/memory/hipMallocManaged.cc b/catch/unit/memory/hipMallocManaged.cc index 672db36485..5921ec122e 100644 --- a/catch/unit/memory/hipMallocManaged.cc +++ b/catch/unit/memory/hipMallocManaged.cc @@ -114,6 +114,7 @@ TEST_CASE("Unit_hipMallocManaged_Advanced") { HIP_CHECK(hipEventRecord(event0, 0)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A), static_cast(B), C, numElements); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipEventRecord(event1, 0)); HIP_CHECK(hipDeviceSynchronize()); float time = 0.0f; diff --git a/catch/unit/memory/hipMallocPitch.cc b/catch/unit/memory/hipMallocPitch.cc index 47afb66fff..5a20671e14 100644 --- a/catch/unit/memory/hipMallocPitch.cc +++ b/catch/unit/memory/hipMallocPitch.cc @@ -536,6 +536,7 @@ TEMPLATE_TEST_CASE("Unit_hipMallocPitch_KernelLaunch", "" hipLaunchKernelGGL(copy_var, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), ROWS, pitch_A); + HIP_CHECK(hipGetLastError()); // hipMemcpy2D Device to Host diff --git a/catch/unit/memory/hipMemcpy.cc b/catch/unit/memory/hipMemcpy.cc index f6343e52fc..20860d2843 100644 --- a/catch/unit/memory/hipMemcpy.cc +++ b/catch/unit/memory/hipMemcpy.cc @@ -241,6 +241,7 @@ void memcpytest2(DeviceMemory* dmem, HostMemory* hmem, hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(dmem->A_d()), static_cast(dmem->B_d()), dmem->C_d(), numElements); + HIP_CHECK(hipGetLastError()); if (useDeviceToDevice) { // Do an extra device-to-device copy here to mix things up: @@ -402,7 +403,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_KernelLaunch", "", int, float, hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, NUM_ELM); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); @@ -567,7 +568,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_PinnedRegMemWithKernelLaunch", hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, NUM_ELM); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM); @@ -595,7 +596,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_PinnedRegMemWithKernelLaunch", hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, NUM_ELM); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM); diff --git a/catch/unit/memory/hipMemcpyAllApiNegative.cc b/catch/unit/memory/hipMemcpyAllApiNegative.cc index 76a5da166a..7ea39d30be 100644 --- a/catch/unit/memory/hipMemcpyAllApiNegative.cc +++ b/catch/unit/memory/hipMemcpyAllApiNegative.cc @@ -319,6 +319,7 @@ TEST_CASE("Unit_hipMemcpy_HalfMemCopy") { HIP_CHECK(hipMemcpyAsync(B_h, A_d, (NUM_ELM/2)*sizeof(float), hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipDeviceSynchronize()); HipTest::checkTest(A_h, B_h, NUM_ELM/2); } HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); diff --git a/catch/unit/memory/hipMemcpyAsync.cc b/catch/unit/memory/hipMemcpyAsync.cc index b9798f963e..503763aed8 100644 --- a/catch/unit/memory/hipMemcpyAsync.cc +++ b/catch/unit/memory/hipMemcpyAsync.cc @@ -50,6 +50,7 @@ void Thread_func(T *A_d, T *B_d, T* C_d, T* C_h, size_t Nbytes, hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, mystream, A_d, C_d, N_ELMTS); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); // The following two MemcpyAsync calls are for sole // purpose of loading stream with multiple async calls @@ -76,6 +77,7 @@ void Thread_func_MultiStream() { hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream, A_d, C_d, N_ELMTS); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); // The following hipMemcpyAsync() is called only to // load stream with multiple Async calls @@ -121,7 +123,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_KernelLaunch", "", int, float, hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, NUM_ELM); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); HIP_CHECK(hipStreamSynchronize(stream)); HIP_CHECK(hipStreamDestroy(stream)); @@ -348,7 +350,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch", hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, NUM_ELM); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM); @@ -379,7 +381,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch", hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, NUM_ELM); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost, gpu1Stream)); HIP_CHECK(hipStreamSynchronize(gpu1Stream)); diff --git a/catch/unit/memory/hipMemcpyDtoD.cc b/catch/unit/memory/hipMemcpyDtoD.cc index ab6dc99ebe..ff5eb2c90c 100644 --- a/catch/unit/memory/hipMemcpyDtoD.cc +++ b/catch/unit/memory/hipMemcpyDtoD.cc @@ -66,6 +66,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "", dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, NUM_ELM); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM); @@ -80,6 +81,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "", dim3(1), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, NUM_ELM); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyDtoH(C_h, (hipDeviceptr_t)Z_d, Nbytes)); HIP_CHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM); diff --git a/catch/unit/memory/hipMemcpyDtoDAsync.cc b/catch/unit/memory/hipMemcpyDtoDAsync.cc index af5be6d284..8dc20bf612 100644 --- a/catch/unit/memory/hipMemcpyDtoDAsync.cc +++ b/catch/unit/memory/hipMemcpyDtoDAsync.cc @@ -68,6 +68,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "", dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, NUM_ELM); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM); @@ -84,6 +85,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "", dim3(1), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, NUM_ELM); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, stream)); HIP_CHECK(hipStreamSynchronize(stream)); HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/memory/hipMemcpyPeer.cc b/catch/unit/memory/hipMemcpyPeer.cc index d991d95924..a8ebe9534f 100644 --- a/catch/unit/memory/hipMemcpyPeer.cc +++ b/catch/unit/memory/hipMemcpyPeer.cc @@ -127,6 +127,7 @@ TEST_CASE("Unit_hipMemcpyPeer_Basic") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, numElements*sizeof(int)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int), hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, numElements); @@ -138,6 +139,7 @@ TEST_CASE("Unit_hipMemcpyPeer_Basic") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, numElements*sizeof(int)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int), hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, numElements); diff --git a/catch/unit/memory/hipMemcpyPeerAsync.cc b/catch/unit/memory/hipMemcpyPeerAsync.cc index c3a0b31501..1c738f1c76 100644 --- a/catch/unit/memory/hipMemcpyPeerAsync.cc +++ b/catch/unit/memory/hipMemcpyPeerAsync.cc @@ -143,6 +143,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, numElements*sizeof(int)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int), hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, numElements); @@ -166,6 +167,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, numElements*sizeof(int)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int), hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, numElements); @@ -227,6 +229,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_StreamOnDiffDevice") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(A_d), static_cast(B_d), C_d, numElements*sizeof(int)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int), hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, numElements); @@ -240,6 +243,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_StreamOnDiffDevice") { hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, numElements*sizeof(int)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int), hipMemcpyDeviceToHost)); diff --git a/catch/unit/memory/hipMemcpyWithStream.cc b/catch/unit/memory/hipMemcpyWithStream.cc index f7e1be6001..5efc690b8d 100644 --- a/catch/unit/memory/hipMemcpyWithStream.cc +++ b/catch/unit/memory/hipMemcpyWithStream.cc @@ -99,6 +99,7 @@ void TestwithOnestream(void) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(A_d), static_cast(B_d), C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HipTest::checkVectorADD(A_h, B_h, C_h, N); @@ -136,6 +137,7 @@ void TestwithTwoStream(void) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NUM_STREAMS; ++i) { @@ -191,6 +193,7 @@ void TestDtoDonSameDevice(void) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NUM_STREAMS; ++i) { @@ -262,6 +265,7 @@ void TestOnMultiGPUwithOneStream(void) { dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NumDevices; ++i) { @@ -298,6 +302,7 @@ void TestkindDtoH(void) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(A_d), static_cast(B_d), C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); HIP_CHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); @@ -365,6 +370,7 @@ void TestkindDtoD(void) { dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NumDevices; ++i) { @@ -411,6 +417,7 @@ void TestkindDefault(void) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(A_d), static_cast(B_d), C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(stream)); HIP_CHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream)); HipTest::checkVectorADD(A_h, B_h, C_h, N); @@ -472,6 +479,7 @@ void TestkindDefaultForDtoD(void) { dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NumDevices; ++i) { diff --git a/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc b/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc index 435a94ac2b..187654ac31 100644 --- a/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc +++ b/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc @@ -167,6 +167,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestwithOnestream(bool &val_res) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(A_d), static_cast(B_d), C_d, N); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipStreamSynchronize(stream)); HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); val_res = ValidateResult(A_h, B_h, C_h); @@ -203,6 +204,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestwithTwoStream(bool &val_res) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NoofStreams; ++i) { @@ -258,6 +260,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestDtoDonSameDevice(bool &val_res) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < NoofStreams; ++i) { @@ -329,6 +332,7 @@ void HipMemcpyWithStreamMultiThreadtests:: hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < numDevices; ++i) { @@ -363,6 +367,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDtoH(bool &val_res) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(A_d), static_cast(B_d), C_d, N); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipStreamSynchronize(stream)); HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); @@ -434,6 +439,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDtoD(bool &val_res) { hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < numDevices; ++i) { @@ -481,6 +487,7 @@ void HipMemcpyWithStreamMultiThreadtests:: hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream, static_cast(A_d), static_cast(B_d), C_d, N); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipStreamSynchronize(stream)); HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream)); val_res = ValidateResult(A_h, B_h, C_h); @@ -544,6 +551,7 @@ void HipMemcpyWithStreamMultiThreadtests:: hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); + HIP_CHECK(hipGetLastError()); } for (int i=0; i < numDevices; ++i) { diff --git a/catch/unit/memory/hipMemoryAllocateCoherent.cc b/catch/unit/memory/hipMemoryAllocateCoherent.cc index 0d14539191..50747e275d 100644 --- a/catch/unit/memory/hipMemoryAllocateCoherent.cc +++ b/catch/unit/memory/hipMemoryAllocateCoherent.cc @@ -53,6 +53,7 @@ TEST_CASE("Unit_hipHostMalloc_CoherentAccess") { std::cout << clkRate << std::endl; hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks), 0, 0, hostRes, clkRate); + HIP_CHECK(hipGetLastError()); int eleCounter = 0; while (eleCounter < blocks) { // blocks until the value changes diff --git a/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc b/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc index 89390bb908..3870f9bf6f 100644 --- a/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc +++ b/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc @@ -31,7 +31,7 @@ /* Defines */ #define NUM_THREADS 1000 -#define ITER 100 +#define ITER 10 #define NUM_H 256 #define NUM_W 256 @@ -105,6 +105,7 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") { for (size_t k = 0; k < ITER; k++) { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, hipStreamPerThread, B_d, C_d, elements); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, hipStreamPerThread)); diff --git a/catch/unit/memory/hipMemset3DFunctional.cc b/catch/unit/memory/hipMemset3DFunctional.cc index 20e97278e5..11d36a70cc 100644 --- a/catch/unit/memory/hipMemset3DFunctional.cc +++ b/catch/unit/memory/hipMemset3DFunctional.cc @@ -369,7 +369,7 @@ static void seekAndSet3DArrayPortion(bool bAsync) { myparms.srcPos = make_hipPos(0, 0, 0); myparms.dstPos = make_hipPos(0, 0, 0); myparms.dstPtr = make_hipPitchedPtr(array3D, sizeof(char) * arr_dimensions.x, - arr_dimensions.x, arr_dimensions.y); + arr_dimensions.y, arr_dimensions.z); myparms.srcPtr = devicePitchedPointer; myparms.extent = extent; #if HT_NVIDIA diff --git a/catch/unit/memory/hipMemset3DRegressMultiThread.cc b/catch/unit/memory/hipMemset3DRegressMultiThread.cc index b3a65f2104..2f3c31933b 100644 --- a/catch/unit/memory/hipMemset3DRegressMultiThread.cc +++ b/catch/unit/memory/hipMemset3DRegressMultiThread.cc @@ -84,6 +84,7 @@ static void threadFunc(hipStream_t stream, hipPitchedPtr devpPtr, hipLaunchKernelGGL(func_set_value, dim3(blocks), dim3(threadsPerBlock), 0, stream, devpPtr, extent, memsetval); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipMemset3DAsync(devpPtr, testval, extent, stream)); HIPCHECK(hipMemcpy3DAsync(&myparms, stream)); } diff --git a/catch/unit/memory/hipMemsetAsyncAndKernel.cc b/catch/unit/memory/hipMemsetAsyncAndKernel.cc index a13e937de7..a3578e9b28 100644 --- a/catch/unit/memory/hipMemsetAsyncAndKernel.cc +++ b/catch/unit/memory/hipMemsetAsyncAndKernel.cc @@ -92,6 +92,7 @@ static bool testhipMemsetAsyncWithKernel(bool UseStrmPerThrd) { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, hipStreamPerThread, obj.B_d, obj.C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal, N, hipStreamPerThread)); HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); @@ -117,6 +118,7 @@ static bool testhipMemsetD32AsyncWithKernel() { for (int k = 0 ; k < ITER ; k++) { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemsetD32Async((hipDeviceptr_t)obj.C_d , obj.memSetVal, N, obj.stream)); HIP_CHECK(hipStreamSynchronize(obj.stream)); @@ -135,6 +137,7 @@ static bool testhipMemsetD16AsyncWithKernel() { for (int k = 0 ; k < ITER ; k++) { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemsetD16Async((hipDeviceptr_t)obj.C_d , obj.memSetVal, N, obj.stream)); HIP_CHECK(hipStreamSynchronize(obj.stream)); @@ -153,6 +156,7 @@ static bool testhipMemsetD8AsyncWithKernel() { for (int k = 0; k < ITER; k++) { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemsetD8Async((hipDeviceptr_t)obj.C_d, obj.memSetVal, N, obj.stream)); HIP_CHECK(hipStreamSynchronize(obj.stream)); diff --git a/catch/unit/memory/hipPointerGetAttribute.cc b/catch/unit/memory/hipPointerGetAttribute.cc index f56f0a0e49..393221da11 100644 --- a/catch/unit/memory/hipPointerGetAttribute.cc +++ b/catch/unit/memory/hipPointerGetAttribute.cc @@ -138,6 +138,7 @@ TEST_CASE("Unit_hipPointerGetAttribute_KernelUpdation") { reinterpret_cast(A_d))); hipLaunchKernelGGL(var_update, dim3(1), dim3(1), 0, 0, reinterpret_cast(data)); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); for (unsigned int i = 0; i < N; i++) { diff --git a/catch/unit/multiThread/hipMultiThreadStreams1.cc b/catch/unit/multiThread/hipMultiThreadStreams1.cc index d4e3af63bd..0d672c3146 100644 --- a/catch/unit/multiThread/hipMultiThreadStreams1.cc +++ b/catch/unit/multiThread/hipMultiThreadStreams1.cc @@ -74,6 +74,7 @@ void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) { hipLaunchKernelGGL(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast(A_d), static_cast(B_d), C_d, numElements); + HIP_CHECK(hipGetLastError()); MemTraits::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream); diff --git a/catch/unit/multiThread/hipMultiThreadStreams2.cc b/catch/unit/multiThread/hipMultiThreadStreams2.cc index d6b6573cb1..4ee323c05d 100644 --- a/catch/unit/multiThread/hipMultiThreadStreams2.cc +++ b/catch/unit/multiThread/hipMultiThreadStreams2.cc @@ -56,6 +56,7 @@ void run1(size_t size, hipStream_t stream) { HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream)); HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream)); hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream, Cd); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream)); HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream)); HIPCHECK(hipDeviceSynchronize()); @@ -97,7 +98,9 @@ void run(size_t size, hipStream_t stream1, hipStream_t stream2) { HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1)); HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2)); hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream1, Cd); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream2, Cdd); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1)); HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2)); HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1)); diff --git a/catch/unit/stream/hipAPIStreamDisable.cc b/catch/unit/stream/hipAPIStreamDisable.cc index 4c6c6693da..e1163471ef 100644 --- a/catch/unit/stream/hipAPIStreamDisable.cc +++ b/catch/unit/stream/hipAPIStreamDisable.cc @@ -59,8 +59,10 @@ TEST_CASE("Unit_hipStreamCreate_MultistreamBasicFunctionalities") { hipLaunchKernelGGL(HIP_KERNEL_NAME(hipAPIStreamDisableTest::kernel), dim3(1), dim3(1), 0, streams[i], data[i], xd, hipAPIStreamDisableTest::NN); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(HIP_KERNEL_NAME(hipAPIStreamDisableTest::nKernel), dim3(1), dim3(1), 0, 0, yd); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipStreamDestroy(streams[i])); } HIP_CHECK(hipMemcpy(&x, xd, sizeof(float), hipMemcpyDeviceToHost)); diff --git a/catch/unit/stream/hipMultiStream.cc b/catch/unit/stream/hipMultiStream.cc index 64e4bb13b0..cffc9af887 100644 --- a/catch/unit/stream/hipMultiStream.cc +++ b/catch/unit/stream/hipMultiStream.cc @@ -49,7 +49,9 @@ TEST_CASE("Unit_hipMultiStream_sameDevice") { HIP_CHECK(hipStreamCreate(&streams[i])); HIP_CHECK(hipMalloc(&data[i], NN * sizeof(float))); hipLaunchKernelGGL(kernel, dim3(1), dim3(1), 0, streams[i], data[i], xd, NN); + HIP_CHECK(hipGetLastError()); hipLaunchKernelGGL(HIP_KERNEL_NAME(nKernel), dim3(1), dim3(1), 0, 0, yd); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipFree(data[i])); HIP_CHECK(hipStreamDestroy(streams[i])); } @@ -80,9 +82,11 @@ TEST_CASE("Unit_hipMultiStream_multimeDevice") { HIP_CHECK(hipDeviceSynchronize()); // Launch kernel with default stream hipLaunchKernelGGL(kernel_do_nothing, dim3(1), dim3(1), 0, 0, 1); + HIP_CHECK(hipGetLastError()); // Launch kernel on all streams for (int i = 0; i < nStreams; i++) { hipLaunchKernelGGL(kernel_do_nothing, dim3(1), dim3(1), 0, streams[i], 1); + HIP_CHECK(hipGetLastError()); } // Sync stream 1 HIP_CHECK(hipStreamSynchronize(streams[0])); diff --git a/catch/unit/stream/hipStreamACb_MultiThread.cc b/catch/unit/stream/hipStreamACb_MultiThread.cc index 07b999d72a..09946ced5d 100644 --- a/catch/unit/stream/hipStreamACb_MultiThread.cc +++ b/catch/unit/stream/hipStreamACb_MultiThread.cc @@ -131,7 +131,7 @@ TEST_CASE("Unit_hipStreamAddCallback_MultipleThreads") { hipLaunchKernelGGL((device_function), dim3(blocks), dim3(threadsPerBlock), 0, mystream, C_d, A_d, N); - + HIP_CHECK(hipGetLastError()); HIP_CHECK( hipMemcpyAsync(C1_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); diff --git a/catch/unit/stream/hipStreamAddCallback.cc b/catch/unit/stream/hipStreamAddCallback.cc index 15dd98741d..567cfa1686 100644 --- a/catch/unit/stream/hipStreamAddCallback.cc +++ b/catch/unit/stream/hipStreamAddCallback.cc @@ -86,7 +86,7 @@ bool testStreamCallbackFunctionality(bool isDefault) { const unsigned threadsPerBlock = 256; hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, C_d, NSize); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, 0)); HIP_CHECK(hipStreamAddCallback(0, Callback, nullptr, 0)); @@ -102,7 +102,7 @@ bool testStreamCallbackFunctionality(bool isDefault) { const unsigned threadsPerBlock = 256; hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream, A_d, C_d, NSize); - + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0)); diff --git a/catch/unit/stream/hipStreamCreateWithPriority.cc b/catch/unit/stream/hipStreamCreateWithPriority.cc index 9405db8289..39b4b98061 100644 --- a/catch/unit/stream/hipStreamCreateWithPriority.cc +++ b/catch/unit/stream/hipStreamCreateWithPriority.cc @@ -146,6 +146,7 @@ void funcTestsForAllPriorityLevelsWrtNullStrm(unsigned int flags, hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE), dim3(BLOCKSIZE), 0, stream[idx], A_d[idx], C_d[idx], MEMCPYSIZE2); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size, hipMemcpyDeviceToHost, stream[idx])); } @@ -227,6 +228,7 @@ void queueTasksInStreams(std::vector *stream, hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE), dim3(BLOCKSIZE), 0, (*stream)[idx], A_d[idx], C_d[idx], MEMCPYSIZE2); + HIP_CHECK(hipGetLastError()); HIPCHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size, hipMemcpyDeviceToHost, (*stream)[idx])); } @@ -426,6 +428,7 @@ bool validateStreamPrioritiesWithEvents() { hipLaunchKernelGGL((memcpy_kernel), dim3(GRIDSIZE), \ dim3(BLOCKSIZE), 0, stream_##x, dst_d_##x + j, src_d_##x + j, \ (MEMCPYSIZE / sizeof(T))); \ + HIP_CHECK(hipGetLastError()); \ } OP(low) OP(normal) diff --git a/catch/unit/stream/hipStreamWithCUMask.cc b/catch/unit/stream/hipStreamWithCUMask.cc index adf1db3e80..61535ed2cf 100644 --- a/catch/unit/stream/hipStreamWithCUMask.cc +++ b/catch/unit/stream/hipStreamWithCUMask.cc @@ -157,6 +157,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_ValidateCallbackFunc") { const unsigned threadsPerBlock = BLOCKSIZE; hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream, A_d, C_d, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0)); @@ -250,6 +251,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, streams[0], dA[0], dC[0], N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); auto single_end = std::chrono::steady_clock::now(); @@ -269,6 +271,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") { << streams[np] << " with CU mask: 0x" << ss[np].str().c_str()); hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, streams[np], dA[np], dC[np], N); + HIP_CHECK(hipGetLastError()); } HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/texture/hipBindTex2DPitch.cc b/catch/unit/texture/hipBindTex2DPitch.cc index fc5c5e5b59..4bd671af00 100644 --- a/catch/unit/texture/hipBindTex2DPitch.cc +++ b/catch/unit/texture/hipBindTex2DPitch.cc @@ -66,6 +66,7 @@ TEST_CASE("Unit_hipBindTexture2D_Pitch") { hipLaunchKernelGGL(texture2dCopyKernel, dim3(4, 4, 1), dim3(32, 32, 1), 0, 0, devPtrB); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy2D(B, SIZE_W*sizeof(TYPE_t), devPtrB, SIZE_W*sizeof(TYPE_t), SIZE_W*sizeof(TYPE_t), diff --git a/catch/unit/texture/hipBindTexRef1DFetch.cc b/catch/unit/texture/hipBindTexRef1DFetch.cc index 4e96abc806..7291b786bb 100644 --- a/catch/unit/texture/hipBindTexRef1DFetch.cc +++ b/catch/unit/texture/hipBindTexRef1DFetch.cc @@ -63,6 +63,7 @@ TEST_CASE("Unit_hipBindTexture_tex1DfetchVerification") { dim3 dimGrid(N / dimBlock.x, 1, 1); hipLaunchKernelGGL(kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, devBuf); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(output, devBuf, N * sizeof(float), hipMemcpyDeviceToHost)); diff --git a/catch/unit/texture/hipNormalizedFloatValueTex.cc b/catch/unit/texture/hipNormalizedFloatValueTex.cc index a6eceef38d..8e660c41cf 100644 --- a/catch/unit/texture/hipNormalizedFloatValueTex.cc +++ b/catch/unit/texture/hipNormalizedFloatValueTex.cc @@ -118,6 +118,7 @@ static void textureTest(texture, dim3(1, 1, 1), dim3(SIZE, 1, 1), 0, 0, SIZE, dOutputData); + HIP_CHECK(hipGetLastError()); float *hOutputData = new float[SIZE]; REQUIRE(hOutputData != nullptr); diff --git a/catch/unit/texture/hipSimpleTexture2DLayered.cc b/catch/unit/texture/hipSimpleTexture2DLayered.cc index fdf28458d2..1bf6ea3af7 100644 --- a/catch/unit/texture/hipSimpleTexture2DLayered.cc +++ b/catch/unit/texture/hipSimpleTexture2DLayered.cc @@ -86,9 +86,11 @@ TEST_CASE("Unit_hipSimpleTexture2DLayered_Check") { dim3 dimBlock(8, 8, 1); dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - for (unsigned int layer = 0; layer < num_layers; layer++) + for (unsigned int layer = 0; layer < num_layers; layer++) { hipLaunchKernelGGL(simpleKernelLayeredArray, dimGrid, dimBlock, 0, 0, dData, width, height, layer); + HIP_CHECK(hipGetLastError()); + } HIP_CHECK(hipDeviceSynchronize()); // Allocate mem for the result on host side diff --git a/catch/unit/texture/hipSimpleTexture3D.cc b/catch/unit/texture/hipSimpleTexture3D.cc index 3d59101d51..0de3aec096 100644 --- a/catch/unit/texture/hipSimpleTexture3D.cc +++ b/catch/unit/texture/hipSimpleTexture3D.cc @@ -92,6 +92,7 @@ static void runSimpleTexture3D_Check(int width, int height, int depth, hipLaunchKernelGGL(simpleKernel3DArray, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, dData, width, height, depth); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); // Allocate mem for the result on host side diff --git a/catch/unit/texture/hipTex1DFetchCheckModes.cc b/catch/unit/texture/hipTex1DFetchCheckModes.cc index c3249411b6..158777e42b 100644 --- a/catch/unit/texture/hipTex1DFetchCheckModes.cc +++ b/catch/unit/texture/hipTex1DFetchCheckModes.cc @@ -79,6 +79,7 @@ static void runTest(hipTextureAddressMode addressMode, hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, texBufOut, texObj); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(float), diff --git a/catch/unit/texture/hipTexObjPitch.cc b/catch/unit/texture/hipTexObjPitch.cc index 6c7184bd3a..364bc79a21 100644 --- a/catch/unit/texture/hipTexObjPitch.cc +++ b/catch/unit/texture/hipTexObjPitch.cc @@ -85,6 +85,7 @@ TEMPLATE_TEST_CASE("Unit_hipTexObjPitch_texture2D", "", float, int, hipLaunchKernelGGL(texture2dCopyKernel, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, texObj, devPtrB); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipMemcpy2D(B, SIZE_W*sizeof(TestType), devPtrB, SIZE_W*sizeof(TestType), SIZE_W*sizeof(TestType), diff --git a/catch/unit/texture/hipTextureMipmapObj2D.cc b/catch/unit/texture/hipTextureMipmapObj2D.cc index 7545f2a073..7c55e6103e 100644 --- a/catch/unit/texture/hipTextureMipmapObj2D.cc +++ b/catch/unit/texture/hipTextureMipmapObj2D.cc @@ -97,6 +97,7 @@ static void runMipMapTest(unsigned int width, unsigned int height, unsigned int hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, width, (2 * mipmap_level)); + HIP_CHECK(hipGetLastError()); hipDeviceSynchronize(); float* hOutputData = reinterpret_cast(malloc(size)); diff --git a/catch/unit/texture/hipTextureObj1DCheckModes.cc b/catch/unit/texture/hipTextureObj1DCheckModes.cc index 55f4910370..390ef5abc0 100644 --- a/catch/unit/texture/hipTextureObj1DCheckModes.cc +++ b/catch/unit/texture/hipTextureObj1DCheckModes.cc @@ -73,6 +73,7 @@ static void runTest(const int width, const float offsetX) { hipLaunchKernelGGL(tex1DKernel, dimGrid, dimBlock, 0, 0, dData, textureObject, width, offsetX); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/texture/hipTextureObj1DCheckSRGBModes.cc b/catch/unit/texture/hipTextureObj1DCheckSRGBModes.cc index c54bdfc2db..848a25bc89 100644 --- a/catch/unit/texture/hipTextureObj1DCheckSRGBModes.cc +++ b/catch/unit/texture/hipTextureObj1DCheckSRGBModes.cc @@ -113,9 +113,11 @@ static void runTest(const int width, const float offsetX = 0) { if (resType == hipResourceTypeArray) { hipLaunchKernelGGL(tex1DRGBAKernel, dimGrid, dimBlock, 0, 0, dData, textureObject, width, offsetX); + HIP_CHECK(hipGetLastError()); } else { hipLaunchKernelGGL(tex1DRGBAKernelFetch, dimGrid, dimBlock, 0, 0, dData, textureObject, offsetX); + HIP_CHECK(hipGetLastError()); } HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/texture/hipTextureObj1DFetch.cc b/catch/unit/texture/hipTextureObj1DFetch.cc index a7e25c6a40..62c6c2c5c1 100644 --- a/catch/unit/texture/hipTextureObj1DFetch.cc +++ b/catch/unit/texture/hipTextureObj1DFetch.cc @@ -69,6 +69,7 @@ TEST_CASE("Unit_hipCreateTextureObject_tex1DfetchVerification") { hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, texBufOut, texObj); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(float), diff --git a/catch/unit/texture/hipTextureObj2D.cc b/catch/unit/texture/hipTextureObj2D.cc index 48efb13bb1..d545962165 100644 --- a/catch/unit/texture/hipTextureObj2D.cc +++ b/catch/unit/texture/hipTextureObj2D.cc @@ -85,6 +85,7 @@ TEST_CASE("Unit_hipTextureObj2D_Check") { hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, width); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost)); diff --git a/catch/unit/texture/hipTextureObj2DCheckModes.cc b/catch/unit/texture/hipTextureObj2DCheckModes.cc index e9d1665c20..84f1081a2a 100644 --- a/catch/unit/texture/hipTextureObj2DCheckModes.cc +++ b/catch/unit/texture/hipTextureObj2DCheckModes.cc @@ -81,6 +81,7 @@ static void runTest(const int width, const int height, const float offsetX, cons hipLaunchKernelGGL(tex2DKernel, dimGrid, dimBlock, 0, 0, dData, textureObject, width, height, offsetX, offsetY); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/texture/hipTextureObj2DCheckSRGBModes.cc b/catch/unit/texture/hipTextureObj2DCheckSRGBModes.cc index 5f44e2d7e9..73df5dd16b 100644 --- a/catch/unit/texture/hipTextureObj2DCheckSRGBModes.cc +++ b/catch/unit/texture/hipTextureObj2DCheckSRGBModes.cc @@ -86,6 +86,7 @@ static void runTest(const int width, const int height, const float offsetX, cons hipLaunchKernelGGL(tex2DRGBAKernel, dimGrid, dimBlock, 0, 0, dData, textureObject, width, height, offsetX, offsetY); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/texture/hipTextureObj3DCheckModes.cc b/catch/unit/texture/hipTextureObj3DCheckModes.cc index b7f0a6215e..04d8433566 100644 --- a/catch/unit/texture/hipTextureObj3DCheckModes.cc +++ b/catch/unit/texture/hipTextureObj3DCheckModes.cc @@ -113,6 +113,7 @@ static void runTest(const int width, const int height, const int depth, const fl hipLaunchKernelGGL(tex3DKernel, dimGrid, dimBlock, 0, 0, dData, textureObject, width, height, depth, offsetX, offsetY, offsetZ); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); diff --git a/catch/unit/texture/hipTextureObjFetchVector.cc b/catch/unit/texture/hipTextureObjFetchVector.cc index 616602038a..b965ac74e8 100644 --- a/catch/unit/texture/hipTextureObjFetchVector.cc +++ b/catch/unit/texture/hipTextureObjFetchVector.cc @@ -178,6 +178,7 @@ bool runTest() { hipLaunchKernelGGL(tex1dKernelFetch, dimGrid, dimBlock, 0, 0, texBufOut, texObj, N); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(T), hipMemcpyDeviceToHost)); diff --git a/catch/unit/texture/hipTextureRef2D.cc b/catch/unit/texture/hipTextureRef2D.cc index b4c98d7ae9..d0a31644be 100644 --- a/catch/unit/texture/hipTextureRef2D.cc +++ b/catch/unit/texture/hipTextureRef2D.cc @@ -69,6 +69,7 @@ TEST_CASE("Unit_hipTextureRef2D_Check") { dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, width); + HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); float* hOutputData = reinterpret_cast(malloc(size));