diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index 738a2db5e6..fee3605ea4 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -47,7 +47,6 @@ "Unit_hipGraphicsUnregisterResource_Negative_Parameters", "SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it", "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", - "Unit_Assert_Positive_Basic_KernelFail", "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t", "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t", "Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 48d09a27f0..77fa375707 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -428,7 +428,6 @@ "Unit_hipGraphUpload_Functional_multidevice_test", "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", "Unit_StaticAssert_Positive_Basic_RTC", - "Unit_Assert_Positive_Basic_KernelFail", "=== Below tests are disabled due to defect EXSWHTEC-356 ===", "Unit_Device___hisinf2_Accuracy_Positive", "Unit_Device___hisnan2_Accuracy_Positive", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json index 98688479c7..762770a79c 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json @@ -10,7 +10,6 @@ "Unit_atomicExch_system_Positive_Host_And_GPU - float", "Unit_hipModuleUnload_Negative_Double_Unload", "=== Below tests are failing PSDB ===", - "Unit_Assert_Positive_Basic_KernelFail", "Unit_hipMemMapArrayAsync_Positive_Basic", "=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 31)]", "Unit_Device_sinpi_Accuracy_Positive - double", diff --git a/projects/hip-tests/catch/unit/assertion/assert.cc b/projects/hip-tests/catch/unit/assertion/assert.cc index 4f766dbfdd..07cc8f5fba 100644 --- a/projects/hip-tests/catch/unit/assertion/assert.cc +++ b/projects/hip-tests/catch/unit/assertion/assert.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,9 +17,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include #include +#include "hip/hip_runtime_api.h" +#include "hip_test_context.hh" /** * @addtogroup assert assert @@ -29,23 +32,6 @@ THE SOFTWARE. * Stops the kernel execution if expression is equal to zero. */ -jmp_buf env_ignore_abort; -volatile int abort_raised_flag = 0; - -void on_sigabrt(int signum) { - signal(signum, SIG_DFL); - abort_raised_flag = 1; - longjmp(env_ignore_abort, 1); -} - -void try_and_catch_abort(void (*func)()) { - if (!setjmp(env_ignore_abort)) { - signal(SIGABRT, &on_sigabrt); - (*func)(); - signal(SIGABRT, SIG_DFL); - } -} - __global__ void AssertPassKernel(int* x) { const int tid = threadIdx.x + blockIdx.x * blockDim.x; *x = tid; @@ -60,25 +46,19 @@ __global__ void AssertFailKernel(int* x) { assert(tid % 2 == 1); } -template void LaunchAssertKernel() { - const int num_blocks = 2; - const int num_threads = 16; - int* d_a; - HIP_CHECK(hipMalloc(&d_a, sizeof(int))); +bool isAbortOnErrorEnabled() { + std::string abort_on_error_env = TestContext::getEnvVar("HIP_SKIP_ABORT_ON_GPU_ERROR"); - if constexpr (should_abort) { - AssertFailKernel<<>>(d_a); -#if HT_AMD - HIP_CHECK(hipDeviceSynchronize()); -#else - HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert); -#endif - } else { - AssertPassKernel<<>>(d_a); - HIP_CHECK(hipDeviceSynchronize()); + if (!abort_on_error_env.empty()) { + try { + return !std::stoi(abort_on_error_env); + } catch (const std::invalid_argument&) { + return true; + } catch (const std::out_of_range&) { + return true; + } } - - HIP_CHECK(hipFree(d_a)); + return false; } /** @@ -94,19 +74,40 @@ template void LaunchAssertKernel() { * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") { - try_and_catch_abort(&LaunchAssertKernel); - REQUIRE(abort_raised_flag == 0); + +#ifdef NDEBUG + HipTest::HIP_SKIP_TEST("Assertions are disabled on this build."); + return; +#endif + +#if HT_AMD + if (isAbortOnErrorEnabled()) { + HipTest::HIP_SKIP_TEST( + "Test incompatible with aborts enabled through HIP_SKIP_ABORT_ON_GPU_ERROR."); + return; + } +#endif + + const int num_blocks = 2; + const int num_threads = 16; + int* d_a; + HIP_CHECK(hipMalloc(&d_a, sizeof(int))); + + AssertPassKernel<<>>(d_a); + + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipFree(d_a)); } /** * Test Description * ------------------------ * - Launches kernels with asserts that have an expression equal to 0. - * - Expects that SIGABRT is raised and kernels have been stopped on AMD. - * - The HIP runtime also aborts the host code, so this test case uses signal handlers - * to avoid host code abortion. + * - Test is skipped on AMD if HIP_SKIP_ABORT_ON_GPU_ERROR=0 to avoid call to std::abort() which + * cannot safely be handled by the test. * - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA. - * - The host code is not aborted. + * - Expects that `hipErrorLaunchFailure` is returned from `hipDeviceSynchronize` on AMD. HSA error + * codes don't have enough granularity to distinguish between assertions and other failures. * Test source * ------------------------ * - unit/assertion/assert.cc @@ -115,11 +116,33 @@ TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") { * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") { - try_and_catch_abort(&LaunchAssertKernel); + +#ifdef NDEBUG + HipTest::HIP_SKIP_TEST("Assertions are disabled on this build."); + return; +#endif + #if HT_AMD - REQUIRE(abort_raised_flag == 1); + if (isAbortOnErrorEnabled()) { + HipTest::HIP_SKIP_TEST( + "Test incompatible with aborts enabled through HIP_SKIP_ABORT_ON_GPU_ERROR."); + return; + } +#endif + + const int num_blocks = 2; + const int num_threads = 16; + int* d_a; + HIP_CHECK(hipMalloc(&d_a, sizeof(int))); + + AssertFailKernel<<>>(d_a); + +#if HT_AMD + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorLaunchFailure); + HIP_CHECK_ERROR(hipFree(d_a), hipErrorLaunchFailure); #else - REQUIRE(abort_raised_flag == 0); + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert); + HIP_CHECK_ERROR(hipFree(d_a), hipErrorAssert); #endif }