[hip-tests] Fix Unit_Assert_Positive_Basic_KernelFail (#1916)
* [hip-tests] Fix Unit_Assert_Positive_Basic_KernelFail This test was expecting a call to abort() when assertions where hit on AMD devices. This is no longer true since aborts from assertions are disabled unless HIP_SKIP_ABORT_ON_GPU_ERROR is set. This PR simplifies the test by removing the SIGABRT signal handling (which was also undefined behaviour). Instead, if HIP_SKIP_ABORT_ON_GPU_ERROR is set, the test is skipped.
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
8e1aee62d0
Коммит
47b80c011c
@@ -47,7 +47,6 @@
|
||||
"Unit_hipGraphicsUnregisterResource_Negative_Parameters",
|
||||
"SWDEV-446588 - Disable graph multi gpu testcases until graph has support for it",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint8_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint16_t",
|
||||
"Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic - uint32_t",
|
||||
|
||||
@@ -428,7 +428,6 @@
|
||||
"Unit_hipGraphUpload_Functional_multidevice_test",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_StaticAssert_Positive_Basic_RTC",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"=== Below tests are disabled due to defect EXSWHTEC-356 ===",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
|
||||
"Unit_hipModuleUnload_Negative_Double_Unload",
|
||||
"=== Below tests are failing PSDB ===",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"Unit_hipMemMapArrayAsync_Positive_Basic",
|
||||
"=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 31)]",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - double",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -17,9 +17,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <hip_test_common.hh>
|
||||
#include <csetjmp>
|
||||
#include <csignal>
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include "hip_test_context.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup assert assert
|
||||
@@ -29,23 +32,6 @@ THE SOFTWARE.
|
||||
* Stops the kernel execution if expression is equal to zero.
|
||||
*/
|
||||
|
||||
jmp_buf env_ignore_abort;
|
||||
volatile int abort_raised_flag = 0;
|
||||
|
||||
void on_sigabrt(int signum) {
|
||||
signal(signum, SIG_DFL);
|
||||
abort_raised_flag = 1;
|
||||
longjmp(env_ignore_abort, 1);
|
||||
}
|
||||
|
||||
void try_and_catch_abort(void (*func)()) {
|
||||
if (!setjmp(env_ignore_abort)) {
|
||||
signal(SIGABRT, &on_sigabrt);
|
||||
(*func)();
|
||||
signal(SIGABRT, SIG_DFL);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void AssertPassKernel(int* x) {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
*x = tid;
|
||||
@@ -60,25 +46,19 @@ __global__ void AssertFailKernel(int* x) {
|
||||
assert(tid % 2 == 1);
|
||||
}
|
||||
|
||||
template <bool should_abort> void LaunchAssertKernel() {
|
||||
const int num_blocks = 2;
|
||||
const int num_threads = 16;
|
||||
int* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(int)));
|
||||
bool isAbortOnErrorEnabled() {
|
||||
std::string abort_on_error_env = TestContext::getEnvVar("HIP_SKIP_ABORT_ON_GPU_ERROR");
|
||||
|
||||
if constexpr (should_abort) {
|
||||
AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
|
||||
#if HT_AMD
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
#else
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
|
||||
#endif
|
||||
} else {
|
||||
AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
if (!abort_on_error_env.empty()) {
|
||||
try {
|
||||
return !std::stoi(abort_on_error_env);
|
||||
} catch (const std::invalid_argument&) {
|
||||
return true;
|
||||
} catch (const std::out_of_range&) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -94,19 +74,40 @@ template <bool should_abort> void LaunchAssertKernel() {
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
|
||||
try_and_catch_abort(&LaunchAssertKernel<false>);
|
||||
REQUIRE(abort_raised_flag == 0);
|
||||
|
||||
#ifdef NDEBUG
|
||||
HipTest::HIP_SKIP_TEST("Assertions are disabled on this build.");
|
||||
return;
|
||||
#endif
|
||||
|
||||
#if HT_AMD
|
||||
if (isAbortOnErrorEnabled()) {
|
||||
HipTest::HIP_SKIP_TEST(
|
||||
"Test incompatible with aborts enabled through HIP_SKIP_ABORT_ON_GPU_ERROR.");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const int num_blocks = 2;
|
||||
const int num_threads = 16;
|
||||
int* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(int)));
|
||||
|
||||
AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels with asserts that have an expression equal to 0.
|
||||
* - Expects that SIGABRT is raised and kernels have been stopped on AMD.
|
||||
* - The HIP runtime also aborts the host code, so this test case uses signal handlers
|
||||
* to avoid host code abortion.
|
||||
* - Test is skipped on AMD if HIP_SKIP_ABORT_ON_GPU_ERROR=0 to avoid call to std::abort() which
|
||||
* cannot safely be handled by the test.
|
||||
* - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA.
|
||||
* - The host code is not aborted.
|
||||
* - Expects that `hipErrorLaunchFailure` is returned from `hipDeviceSynchronize` on AMD. HSA error
|
||||
* codes don't have enough granularity to distinguish between assertions and other failures.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/assert.cc
|
||||
@@ -115,11 +116,33 @@ TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") {
|
||||
try_and_catch_abort(&LaunchAssertKernel<true>);
|
||||
|
||||
#ifdef NDEBUG
|
||||
HipTest::HIP_SKIP_TEST("Assertions are disabled on this build.");
|
||||
return;
|
||||
#endif
|
||||
|
||||
#if HT_AMD
|
||||
REQUIRE(abort_raised_flag == 1);
|
||||
if (isAbortOnErrorEnabled()) {
|
||||
HipTest::HIP_SKIP_TEST(
|
||||
"Test incompatible with aborts enabled through HIP_SKIP_ABORT_ON_GPU_ERROR.");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const int num_blocks = 2;
|
||||
const int num_threads = 16;
|
||||
int* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(int)));
|
||||
|
||||
AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>(d_a);
|
||||
|
||||
#if HT_AMD
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorLaunchFailure);
|
||||
HIP_CHECK_ERROR(hipFree(d_a), hipErrorLaunchFailure);
|
||||
#else
|
||||
REQUIRE(abort_raised_flag == 0);
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
|
||||
HIP_CHECK_ERROR(hipFree(d_a), hipErrorAssert);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user