From da9bb4efae4def1544e55ae3cee519c4ac8af807 Mon Sep 17 00:00:00 2001 From: amilanov-amd Date: Wed, 26 Nov 2025 10:34:05 +0100 Subject: [PATCH] SWDEV-503089 - Fix and enable disabled HIP tests from math group (#1319) * SWDEV-503089 - Fix and enable disabled HIP tests from math group * SWDEV-503089 - Move single precision reduced run to a common function --- .../catch/hipTestMain/config/config_amd_linux | 549 ++---------------- .../config/config_nvidia_linux.json | 55 ++ projects/hip-tests/catch/hipTestMain/main.cc | 8 +- .../hip-tests/catch/include/cmd_options.hh | 2 + .../catch/unit/math/binary_common.hh | 2 +- .../catch/unit/math/casting_common.hh | 47 +- .../catch/unit/math/casting_half2_common.hh | 4 +- .../catch/unit/math/casting_half2_funcs.cc | 2 +- .../unit/math/casting_half_float_funcs.cc | 4 +- .../catch/unit/math/casting_int_funcs.cc | 2 +- .../unit/math/double_precision_intrinsics.cc | 6 +- .../catch/unit/math/half_precision_common.hh | 6 +- .../unit/math/half_precision_comparison.cc | 4 +- .../hip-tests/catch/unit/math/log_funcs.cc | 7 +- .../hip-tests/catch/unit/math/math_common.hh | 79 ++- .../hip-tests/catch/unit/math/misc_funcs.cc | 8 +- .../unit/math/misc_negative_kernels_rtc.hh | 6 +- .../hip-tests/catch/unit/math/pow_common.hh | 2 +- .../hip-tests/catch/unit/math/pow_funcs.cc | 2 +- .../catch/unit/math/quaternary_common.hh | 2 +- .../unit/math/remainder_and_rounding_funcs.cc | 4 +- .../hip-tests/catch/unit/math/root_funcs.cc | 14 +- .../unit/math/single_precision_intrinsics.cc | 6 +- .../catch/unit/math/special_common.hh | 15 +- .../catch/unit/math/special_funcs.cc | 8 +- .../catch/unit/math/ternary_common.hh | 2 +- .../hip-tests/catch/unit/math/trig_funcs.cc | 6 +- .../hip-tests/catch/unit/math/unary_common.hh | 36 +- 28 files changed, 313 insertions(+), 575 deletions(-) diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index e8de3dbd15..725db487bb 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -17,25 +17,6 @@ "NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238", "Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic", "Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range", - "=== Below 2 tests are disable due to defect EXSWHTEC-356 ===", - "Unit_Device___hisinf2_Accuracy_Positive", - "Unit_Device___hisnan2_Accuracy_Positive", - "Unit_Device___hbequ2_Accuracy_Positive", - "Unit_Device___hne_Accuracy_Positive", - "Unit_Device___hne2_Accuracy_Positive", - "Unit_Device___hbne2_Accuracy_Positive", - "Unit_Device___hbgeu2_Accuracy_Positive", - "Unit_Device___hbgtu2_Accuracy_Positive", - "Unit_Device___hbleu2_Accuracy_Positive", - "Unit_Device___hbltu2_Accuracy_Positive", - "=== Below 4 tests are disable due to defect EXSWHTEC-355 ===", - "Unit_Device___hadd_Sanity_Positive", - "Unit_Device___uhadd_Sanity_Positive", - "Unit_Device___rhadd_Sanity_Positive", - "Unit_Device___urhadd_Sanity_Positive", - "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", - "Unit_Device_ilogbf_Accuracy_Positive", - "Unit_Device_ilogb_Accuracy_Positive", "=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===", "Unit_Warp_Shfl_Positive_Basic - int", "Unit_Warp_Shfl_Positive_Basic - unsigned int", @@ -77,470 +58,8 @@ "Unit_hipStreamBeginCapture_Positive_Functional", "Unit_Kernel_Launch_bounds_Negative_OutOfBounds", "Unit_Kernel_Launch_bounds_Negative_Parameters_RTC", - "Unit_Device_sin_Accuracy_Positive - float", - "Unit_Device_sin_Accuracy_Positive - double", - "Unit_Device_cos_Accuracy_Positive - float", - "Unit_Device_cos_Accuracy_Positive - double", - "Unit_Device_tan_Accuracy_Positive - float", - "Unit_Device_tan_Accuracy_Positive - double", - "Unit_Device_asin_Accuracy_Positive - float", - "Unit_Device_asin_Accuracy_Positive - double", - "Unit_Device_acos_Accuracy_Positive - float", - "Unit_Device_acos_Accuracy_Positive - double", - "Unit_Device_atan_Accuracy_Positive - float", - "Unit_Device_atan_Accuracy_Positive - double", - "Unit_Device_sinh_Accuracy_Positive - float", - "Unit_Device_sinh_Accuracy_Positive - double", - "Unit_Device_cosh_Accuracy_Positive - float", - "Unit_Device_cosh_Accuracy_Positive - double", - "Unit_Device_tanh_Accuracy_Positive - float", - "Unit_Device_tanh_Accuracy_Positive - double", - "Unit_Device_asinh_Accuracy_Positive - float", - "Unit_Device_asinh_Accuracy_Positive - double", - "Unit_Device_acosh_Accuracy_Positive - float", - "Unit_Device_acosh_Accuracy_Positive - double", - "Unit_Device_atanh_Accuracy_Positive - float", - "Unit_Device_atanh_Accuracy_Positive - double", - "Unit_Device_sinpi_Accuracy_Positive - float", - "Unit_Device_sinpi_Accuracy_Positive - double", - "Unit_Device_cospi_Accuracy_Positive - float", - "Unit_Device_cospi_Accuracy_Positive - double", - "Unit_Device_tanpi_Accuracy_Positive - float", - "Unit_Device_tanpi_Accuracy_Positive - double", - "Unit_Device_atan2_Accuracy_Positive - float", - "Unit_Device_atan2_Accuracy_Positive - double", - "Unit_Device_sincos_Accuracy_Positive - float", - "Unit_Device_sincos_Accuracy_Positive - double", - "Unit_Device_sincospi_Accuracy_Positive - float", - "Unit_Device_sincospi_Accuracy_Positive - double", - "Unit_Device_fabs_Accuracy_Positive - float", - "Unit_Device_fabs_Accuracy_Positive - double", - "Unit_Device_copysign_Accuracy_Positive - float", - "Unit_Device_copysign_Accuracy_Positive - double", - "Unit_Device_fmax_Accuracy_Positive - float", - "Unit_Device_fmax_Accuracy_Positive - double", - "Unit_Device_fmin_Accuracy_Positive - float", - "Unit_Device_fmin_Accuracy_Positive - double", - "Unit_Device_nextafter_Accuracy_Positive - float", - "Unit_Device_nextafter_Accuracy_Positive - double", - "Unit_Device_fma_Accuracy_Positive - float", - "Unit_Device_fma_Accuracy_Positive - double", - "Unit_Device_fdividef_Accuracy_Positive", - "Unit_Device_isfinite_Accuracy_Positive - float", - "Unit_Device_isfinite_Accuracy_Positive - double", - "Unit_Device_isinf_Accuracy_Positive - float", - "Unit_Device_isinf_Accuracy_Positive - double", - "Unit_Device_isnan_Accuracy_Positive - float", - "Unit_Device_isnan_Accuracy_Positive - double", - "Unit_Device_signbit_Accuracy_Positive - float", - "Unit_Device_signbit_Accuracy_Positive - double", - "Unit_Device_fmod_Accuracy_Positive - float", - "Unit_Device_fmod_Accuracy_Positive - double", - "Unit_Device_remainder_Accuracy_Positive - float", - "Unit_Device_remainder_Accuracy_Positive - double", - "Unit_Device_fdim_Accuracy_Positive - float", - "Unit_Device_fdim_Accuracy_Positive - double", - "Unit_Device_trunc_Accuracy_Positive - float", - "Unit_Device_trunc_Accuracy_Positive - double", - "Unit_Device_round_Accuracy_Positive - float", - "Unit_Device_round_Accuracy_Positive - double", - "Unit_Device_rint_Accuracy_Positive - float", - "Unit_Device_rint_Accuracy_Positive - double", - "Unit_Device_nearbyint_Accuracy_Positive - float", - "Unit_Device_nearbyint_Accuracy_Positive - double", - "Unit_Device_ceil_Accuracy_Positive - float", - "Unit_Device_ceil_Accuracy_Positive - double", - "Unit_Device_floor_Accuracy_Positive - float", - "Unit_Device_floor_Accuracy_Positive - double", - "Unit_Device_lrint_Accuracy_Positive - float", - "Unit_Device_lrint_Accuracy_Positive - double", - "Unit_Device_lround_Accuracy_Positive - float", - "Unit_Device_lround_Accuracy_Positive - double", - "Unit_Device_llrint_Accuracy_Positive - float", - "Unit_Device_llrint_Accuracy_Positive - double", - "Unit_Device_llround_Accuracy_Positive - float", - "Unit_Device_llround_Accuracy_Positive - double", - "Unit_Device_remquo_Accuracy_Positive - float", - "Unit_Device_remquo_Accuracy_Positive - double", - "Unit_Device_modf_Accuracy_Positive - float", - "Unit_Device_modf_Accuracy_Positive - double", - "=== Below tests cause timeout in stress test of 09/02/24 ===", - "Unit_Device___half2half2_Accuracy_Positive", - "Unit_Device_make_half2_Accuracy_Positive", - "Unit_Device___halves2half2_Accuracy_Positive", - "Unit_Device___low2half_Accuracy_Positive", - "Unit_Device___high2half_Accuracy_Positive", - "Unit_Device___low2half2_Accuracy_Positive", - "Unit_Device___high2half2_Accuracy_Positive", - "Unit_Device___lowhigh2highlow_Accuracy_Positive", - "Unit_Device___lows2half2_Accuracy_Positive", - "Unit_Device___highs2half2_Accuracy_Positive", - "Unit_Device___float2half2_rn_Accuracy_Positive", - "Unit_Device___floats2half2_rn_Accuracy_Positive", - "Unit_Device___float22half2_rn_Accuracy_Positive", - "Unit_Device___low2float_Accuracy_Positive", - "Unit_Device___high2float_Accuracy_Positive", - "Unit_Device___half22float2_Accuracy_Positive", - "Unit_Device_hcos_Accuracy_Positive", - "Unit_Device_h2cos_Accuracy_Positive", - "Unit_Device_hsin_Accuracy_Positive", - "Unit_Device_h2sin_Accuracy_Positive", - "Unit_Device_hexp_Accuracy_Positive", - "Unit_Device_h2exp_Accuracy_Positive", - "Unit_Device_hexp10_Accuracy_Positive", - "Unit_Device_h2exp10_Accuracy_Positive", - "Unit_Device_hexp2_Accuracy_Positive", - "Unit_Device_h2exp2_Accuracy_Positive", - "Unit_Device_hlog_Accuracy_Positive", - "Unit_Device_h2log_Accuracy_Positive", - "Unit_Device_hlog10_Accuracy_Positive", - "Unit_Device_h2log10_Accuracy_Positive", - "Unit_Device_hlog2_Accuracy_Positive", - "Unit_Device_h2log2_Accuracy_Positive", - "Unit_Device_hsqrt_Accuracy_Positive", - "Unit_Device_h2sqrt_Accuracy_Positive", - "Unit_Device_hceil_Accuracy_Positive", - "Unit_Device_h2ceil_Accuracy_Positive", - "Unit_Device_hfloor_Accuracy_Positive", - "Unit_Device_h2floor_Accuracy_Positive", - "Unit_Device_htrunc_Accuracy_Positive", - "Unit_Device_h2trunc_Accuracy_Positive", - "Unit_Device_hrcp_Accuracy_Positive", - "Unit_Device_h2rcp_Accuracy_Positive", - "Unit_Device_hrsqrt_Accuracy_Positive", - "Unit_Device_h2rsqrt_Accuracy_Positive", - "Unit_Device_hrint_Accuracy_Positive", - "Unit_Device_h2rint_Accuracy_Positive", - "Unit_Device___habs_Accuracy_Positive", - "Unit_Device___habs2_Accuracy_Positive", - "Unit_Device___hneg_Accuracy_Positive", - "Unit_Device___hneg2_Accuracy_Positive", - "Unit_Device___hadd_wrapper_Accuracy_Positive", - "Unit_Device___hadd2_Accuracy_Positive", - "Unit_Device___hadd_sat_Accuracy_Positive", - "Unit_Device___hadd2_sat_Accuracy_Positive", - "Unit_Device___hsub_Accuracy_Positive", - "Unit_Device___hsub2_Accuracy_Positive", - "Unit_Device___hsub_sat_Accuracy_Positive", - "Unit_Device___hsub2_sat_Accuracy_Positive", - "Unit_Device___hmul_Accuracy_Positive", - "Unit_Device___hmul2_Accuracy_Positive", - "Unit_Device___hmul_sat_Accuracy_Positive", - "Unit_Device___hmul2_sat_Accuracy_Positive", - "Unit_Device___hdiv_Accuracy_Positive", - "Unit_Device___h2div_Accuracy_Positive", - "Unit_Device___hfma_Accuracy_Positive", - "Unit_Device___hfma2_Accuracy_Positive", - "Unit_Device___hfma_sat_Accuracy_Positive", - "Unit_Device___hfma2_sat_Accuracy_Positive", - "Unit_Device___hisinf_Accuracy_Positive", - "Unit_Device___hisinf2_Accuracy_Positive", - "Unit_Device___hisnan_Accuracy_Positive", - "Unit_Device___hisnan2_Accuracy_Positive", - "Unit_Device___heq_Accuracy_Positive", - "Unit_Device___hbeq2_Accuracy_Positive", - "Unit_Device___hequ_Accuracy_Positive", - "Unit_Device___hbequ2_Accuracy_Positive", - "Unit_Device___heq2_Accuracy_Positive", - "Unit_Device___hequ2_Accuracy_Positive", - "Unit_Device___hne_Accuracy_Positive", - "Unit_Device___hbne2_Accuracy_Positive", - "Unit_Device___hneu_Accuracy_Positive", - "Unit_Device___hbneu2_Accuracy_Positive", - "Unit_Device___hne2_Accuracy_Positive", - "Unit_Device___hneu2_Accuracy_Positive", - "Unit_Device___hge_Accuracy_Positive", - "Unit_Device___hbge2_Accuracy_Positive", - "Unit_Device___hgeu_Accuracy_Positive", - "Unit_Device___hbgeu2_Accuracy_Positive", - "Unit_Device___hge2_Accuracy_Positive", - "Unit_Device___hgeu2_Accuracy_Positive", - "Unit_Device___hgt_Accuracy_Positive", - "Unit_Device___hbgt2_Accuracy_Positive", - "Unit_Device___hgtu_Accuracy_Positive", - "Unit_Device___hbgtu2_Accuracy_Positive", - "Unit_Device___hgt2_Accuracy_Positive", - "Unit_Device___hgtu2_Accuracy_Positive", - "Unit_Device___hle_Accuracy_Positive", - "Unit_Device___hble2_Accuracy_Positive", - "Unit_Device___hleu_Accuracy_Positive", - "Unit_Device___hbleu2_Accuracy_Positive", - "Unit_Device___hle2_Accuracy_Positive", - "Unit_Device___hleu2_Accuracy_Positive", - "Unit_Device___hlt_Accuracy_Positive", - "Unit_Device___hblt2_Accuracy_Positive", - "Unit_Device___hltu_Accuracy_Positive", - "Unit_Device___hbltu2_Accuracy_Positive", - "Unit_Device___hlt2_Accuracy_Positive", - "Unit_Device___hltu2_Accuracy_Positive", - "Unit_Device___hmax_Accuracy_Positive", - "Unit_Device___hmin_Accuracy_Positive", - "Unit_Device___hmax_nan_Accuracy_Positive", - "Unit_Device___hmin_nan_Accuracy_Positive", - "Unit_Device___half2int_rn_Accuracy_Positive", - "Unit_Device___half2int_rz_Accuracy_Positive", - "Unit_Device___half2int_rd_Accuracy_Positive", - "Unit_Device___half2int_ru_Accuracy_Positive", - "Unit_Device___half2uint_rn_Accuracy_Positive", - "Unit_Device___half2uint_rz_Accuracy_Positive", - "Unit_Device___half2uint_rd_Accuracy_Positive", - "Unit_Device___half2uint_ru_Accuracy_Positive", - "Unit_Device___half2short_rn_Accuracy_Positive", - "Unit_Device___half2short_rz_Accuracy_Positive", - "Unit_Device___half2short_rd_Accuracy_Positive", - "Unit_Device___half2short_ru_Accuracy_Positive", - "Unit_Device___half2ushort_rn_Accuracy_Positive", - "Unit_Device___half2ushort_rz_Accuracy_Positive", - "Unit_Device___half2ushort_rd_Accuracy_Positive", - "Unit_Device___half2ushort_ru_Accuracy_Positive", - "Unit_Device___half2ll_rn_Accuracy_Positive", - "Unit_Device___half2ll_rz_Accuracy_Positive", - "Unit_Device___half2ll_rd_Accuracy_Positive", - "Unit_Device___half2ll_ru_Accuracy_Positive", - "Unit_Device___half2ull_rn_Accuracy_Positive", - "Unit_Device___half2ull_rz_Accuracy_Positive", - "Unit_Device___half2ull_rd_Accuracy_Positive", - "Unit_Device___half2ull_ru_Accuracy_Positive", - "Unit_Device___half_as_short_Accuracy_Positive", - "Unit_Device___half_as_ushort_Accuracy_Positive", - "Unit_Device___int2half_rn_Accuracy_Positive", - "Unit_Device___int2half_rz_Accuracy_Positive", - "Unit_Device___int2half_rd_Accuracy_Positive", - "Unit_Device___int2half_ru_Accuracy_Positive", - "Unit_Device___uint2half_rn_Accuracy_Positive", - "Unit_Device___uint2half_rz_Accuracy_Positive", - "Unit_Device___uint2half_rd_Accuracy_Positive", - "Unit_Device___uint2half_ru_Accuracy_Positive", - "Unit_Device___short2half_rn_Accuracy_Positive", - "Unit_Device___short2half_rz_Accuracy_Positive", - "Unit_Device___short2half_rd_Accuracy_Positive", - "Unit_Device___short2half_ru_Accuracy_Positive", - "Unit_Device___ushort2half_rn_Accuracy_Positive", - "Unit_Device___ushort2half_rz_Accuracy_Positive", - "Unit_Device___ushort2half_rd_Accuracy_Positive", - "Unit_Device___ushort2half_ru_Accuracy_Positive", - "Unit_Device___ll2half_rn_Accuracy_Positive", - "Unit_Device___ll2half_rz_Accuracy_Positive", - "Unit_Device___ll2half_rd_Accuracy_Positive", - "Unit_Device___ll2half_ru_Accuracy_Positive", - "Unit_Device___ull2half_rn_Accuracy_Positive", - "Unit_Device___ull2half_rz_Accuracy_Positive", - "Unit_Device___ull2half_rd_Accuracy_Positive", - "Unit_Device___ull2half_ru_Accuracy_Positive", - "Unit_Device___short_as_half_Accuracy_Positive", - "Unit_Device___ushort_as_half_Accuracy_Positive", - "Unit_Device___float2half_rn_Accuracy_Positive", - "Unit_Device___float2half_Accuracy_Positive", - "Unit_Device___half2float_Accuracy_Positive", - "Unit_Device___frcp_rn_Accuracy_Positive", - "Unit_Device___fsqrt_rn_Accuracy_Positive", - "Unit_Device___frsqrt_rn_Accuracy_Positive", - "Unit_Device___expf_Accuracy_Positive", - "Unit_Device___exp10f_Accuracy_Positive", - "Unit_Device___logf_Accuracy_Positive", - "Unit_Device___log2f_Accuracy_Positive", - "Unit_Device___log10f_Accuracy_Positive", - "Unit_Device___sinf_Accuracy_Positive", - "Unit_Device___sincosf_sin_Accuracy_Positive", - "Unit_Device___cosf_Accuracy_Positive", - "Unit_Device___sincosf_cos_Accuracy_Positive", - "Unit_Device___fadd_rn_Accuracy_Positive", - "Unit_Device___fsub_rn_Accuracy_Positive", - "Unit_Device___fmul_rn_Accuracy_Positive", - "Unit_Device___fdiv_rn_Accuracy_Positive", - "Unit_Device___fdividef_Accuracy_Positive", - "Unit_Device___fmaf_rn_Accuracy_Positive", - "Unit_Device___drcp_rn_Accuracy_Positive", - "Unit_Device___dsqrt_rn_Accuracy_Positive", - "Unit_Device___dadd_rn_Accuracy_Positive", - "Unit_Device___dsub_rn_Accuracy_Positive", - "Unit_Device___dmul_rn_Accuracy_Positive", - "Unit_Device___ddiv_rn_Accuracy_Positive", - "Unit_Device___fma_rn_Accuracy_Positive", - "Unit_Device_sqrtf_Accuracy_Positive", - "Unit_Device_sqrt_Accuracy_Positive", - "Unit_Device_rsqrtf_Accuracy_Positive", - "Unit_Device_rsqrt_Accuracy_Positive", - "Unit_Device_cbrt_Accuracy_Positive - float", - "Unit_Device_cbrt_Accuracy_Positive - double", - "Unit_Device_rcbrtf_Accuracy_Positive", - "Unit_Device_rcbrt_Accuracy_Positive", - "Unit_Device_hypot_Accuracy_Positive - float", - "Unit_Device_hypot_Accuracy_Positive - double", - "Unit_Device_rhypot_Accuracy_Positive - float", - "Unit_Device_rhypot_Accuracy_Positive - double", - "Unit_Device_norm3d_Accuracy_Positive - float", - "Unit_Device_norm3d_Accuracy_Positive - double", - "Unit_Device_rnorm3d_Accuracy_Positive - float", - "Unit_Device_rnorm3d_Accuracy_Positive - double", - "Unit_Device_norm4d_Accuracy_Positive - float", - "Unit_Device_norm4d_Accuracy_Positive - double", - "Unit_Device_rnorm4d_Accuracy_Positive - float", - "Unit_Device_rnorm4d_Accuracy_Positive - double", - "Unit_Device_exp_Accuracy_Positive - float", - "Unit_Device_exp_Accuracy_Positive - double", - "Unit_Device_exp2_Accuracy_Positive - float", - "Unit_Device_exp2_Accuracy_Positive - double", - "Unit_Device_expm1_Accuracy_Positive - float", - "Unit_Device_expm1_Accuracy_Positive - double", - "Unit_Device_exp10f_Accuracy_Positive", - "Unit_Device_exp10_Accuracy_Positive", - "Unit_Device_frexpf_Accuracy_Positive", - "Unit_Device_frexp_Accuracy_Positive", - "Unit_Device_pow_Accuracy_Positive - float", - "Unit_Device_pow_Accuracy_Positive - double", - "Unit_Device_ldexp_Accuracy_Positive - float", - "Unit_Device_ldexp_Accuracy_Positive - double", - "Unit_Device_powi_Accuracy_Positive - float", - "Unit_Device_powi_Accuracy_Positive - double", - "Unit_Device_scalbn_Accuracy_Positive - float", - "Unit_Device_scalbn_Accuracy_Positive - double", - "Unit_Device_scalbln_Accuracy_Positive - float", - "Unit_Device_scalbln_Accuracy_Positive - double", - "Unit_Device_log_Accuracy_Positive - float", - "Unit_Device_log_Accuracy_Positive - double", - "Unit_Device_log2_Accuracy_Positive - float", - "Unit_Device_log2_Accuracy_Positive - double", - "Unit_Device_log10_Accuracy_Positive - float", - "Unit_Device_log10_Accuracy_Positive - double", - "Unit_Device_log1p_Accuracy_Positive - float", - "Unit_Device_log1p_Accuracy_Positive - double", - "Unit_Device_logb_Accuracy_Positive - float", - "Unit_Device_logb_Accuracy_Positive - double", - "Unit_Device_ilogbf_Accuracy_Positive", - "Unit_Device_ilogb_Accuracy_Positive", - "Unit_Device_erf_Accuracy_Positive - float", - "Unit_Device_erf_Accuracy_Positive - double", - "Unit_Device_erfc_Accuracy_Positive - float", - "Unit_Device_erfc_Accuracy_Positive - double", - "Unit_Device_erfinvf_Accuracy_Positive", - "Unit_Device_erfinv_Accuracy_Positive", - "Unit_Device_erfcinvf_Accuracy_Positive", - "Unit_Device_erfcinv_Accuracy_Positive", - "Unit_Device_normcdff_Accuracy_Positive", - "Unit_Device_normcdf_Accuracy_Positive", - "Unit_Device_tgammaf_Accuracy_Limited_Positive", - "Unit_Device_tgamma_Accuracy_Limited_Positive", - "Unit_Device_lgammaf_Accuracy_Limited_Positive", - "Unit_Device_lgamma_Accuracy_Limited_Positive", - "Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive", - "Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive", - "Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive", - "Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive", - "Unit_Device_y0f_Accuracy_Limited_Positive", - "Unit_Device_y0_Accuracy_Limited_Positive", - "Unit_Device_y1f_Accuracy_Limited_Positive", - "Unit_Device_y1_Accuracy_Limited_Positive", - "Unit_Device_ynf_Accuracy_Limited_Positive", - "Unit_Device_yn_Accuracy_Limited_Positive", - "Unit_Device_j0f_Accuracy_Limited_Positive", - "Unit_Device_j0_Accuracy_Limited_Positive", - "Unit_Device_j1f_Accuracy_Limited_Positive", - "Unit_Device_j1_Accuracy_Limited_Positive", - "Unit_Device_jnf_Accuracy_Limited_Positive", - "Unit_Device_jn_Accuracy_Limited_Positive", - "Unit_Device___double2int_rd_Positive", - "Unit_Device___double2int_rn_Positive", - "Unit_Device___double2int_ru_Positive", - "Unit_Device___double2int_rz_Positive", - "Unit_Device___double2int_Negative_RTC", - "Unit_Device___double2uint_rd_Positive", - "Unit_Device___double2uint_rn_Positive", - "Unit_Device___double2uint_ru_Positive", - "Unit_Device___double2uint_rz_Positive", - "Unit_Device___double2uint_Negative_RTC", - "Unit_Device___double2ll_rd_Positive", - "Unit_Device___double2ll_rn_Positive", - "Unit_Device___double2ll_ru_Positive", - "Unit_Device___double2ll_rz_Positive", - "Unit_Device___double2ll_Negative_RTC", - "Unit_Device___double2ull_rd_Positive", - "Unit_Device___double2ull_rn_Positive", - "Unit_Device___double2ull_ru_Positive", - "Unit_Device___double2ull_rz_Positive", - "Unit_Device___double2ull_Negative_RTC", - "Unit_Device___double2float_rd_Positive", - "Unit_Device___double2float_rn_Positive", - "Unit_Device___double2float_ru_Positive", - "Unit_Device___double2float_rz_Positive", - "Unit_Device___double2float_Negative_RTC", - "Unit_Device___double2hiint_Positive", - "Unit_Device___double2hiint_Negative_RTC", - "Unit_Device___double2loint_Positive", - "Unit_Device___double2loint_Negative_RTC", - "Unit_Device___double_as_longlong_Positive", - "Unit_Device___double_as_longlong_Negative_RTC", - "Unit_Device___float2int_rd_Positive", - "Unit_Device___float2int_rn_Positive", - "Unit_Device___float2int_ru_Positive", - "Unit_Device___float2int_rz_Positive", - "Unit_Device___float2int_Negative_RTC", - "Unit_Device___float2uint_rd_Positive", - "Unit_Device___float2uint_rn_Positive", - "Unit_Device___float2uint_ru_Positive", - "Unit_Device___float2uint_rz_Positive", - "Unit_Device___float2uint_Negative_RTC", - "Unit_Device___float2ll_rd_Positive", - "Unit_Device___float2ll_rn_Positive", - "Unit_Device___float2ll_ru_Positive", - "Unit_Device___float2ll_rz_Positive", - "Unit_Device___float2ll_Negative_RTC", - "Unit_Device___float2ull_rd_Positive", - "Unit_Device___float2ull_rn_Positive", - "Unit_Device___float2ull_ru_Positive", - "Unit_Device___float2ull_rz_Positive", - "Unit_Device___float2ull_Negative_RTC", - "Unit_Device___float_as_int_Positive", - "Unit_Device___float_as_int_Negative_RTC", - "Unit_Device___float_as_uint_Positive", - "Unit_Device___float_as_uint_Negative_RTC", - "Unit_Device___int2float_rd_Positive", - "Unit_Device___int2float_rn_Positive", - "Unit_Device___int2float_ru_Positive", - "Unit_Device___int2float_rz_Positive", - "Unit_Device_int2float___Negative_RTC", - "Unit_Device___uint2float_rd_Positive", - "Unit_Device___uint2float_rn_Positive", - "Unit_Device___uint2float_ru_Positive", - "Unit_Device___uint2float_rz_Positive", - "Unit_Device___uint2float_Negative_RTC", - "Unit_Device___int2double_rn_Positive", - "Unit_Device___int2double_Negative_RTC", - "Unit_Device___uint2double_rn_Positive", - "Unit_Device___uint2double_Negative_RTC", - "Unit_Device___ll2float_rd_Positive", - "Unit_Device___ll2float_rn_Positive", - "Unit_Device___ll2float_ru_Positive", - "Unit_Device___ll2float_rz_Positive", - "Unit_Device___ll2float_Negative_RTC", - "Unit_Device___ull2float_rd_Positive", - "Unit_Device___ull2float_rn_Positive", - "Unit_Device___ull2float_ru_Positive", - "Unit_Device___ull2float_rz_Positive", - "Unit_Device___ull2float_Negative_RTC", - "Unit_Device___ll2double_rd_Positive", - "Unit_Device___ll2double_rn_Positive", - "Unit_Device___ll2double_ru_Positive", - "Unit_Device___ll2double_rz_Positive", - "Unit_Device___ll2double_Negative_RTC", - "Unit_Device___ull2double_rd_Positive", - "Unit_Device___ull2double_rn_Positive", - "Unit_Device___ull2double_ru_Positive", - "Unit_Device___ull2double_rz_Positive", - "Unit_Device___ull2double_Negative_RTC", - "Unit_Device___int_as_float_Positive", - "Unit_Device___int_as_float_Negative_RTC", - "Unit_Device___uint_as_float_Positive", - "Unit_Device___uint_as_float_Negative_RTC", - "Unit_Device___longlong_as_double_Positive", - "Unit_Device___longlong_as_double_Negative_RTC", - "Unit_Device___hiloint2double_Positive", - "Unit_Device___hiloint2double_Negative_RTC", "SWDEV-447384, SWDEV-447932: These tests fail in gfx1100, gfx1101 & gfx1102", "SWDEV-445928: These tests fail in PSDB stress test on 09/02/2024", - "Unit_Device___float2half_rd_Accuracy_Limited_Positive", - "Unit_Device___float2half_ru_Accuracy_Limited_Positive", - "Unit_Device___float2half_rz_Accuracy_Limited_Positive", "Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg", "=== SWDEV-511679 : Below tests fail in stress test ===", "Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device", @@ -551,6 +70,67 @@ "Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg", "=== SWDEV-553920 disabled until multi device graph issues are resolved ===", "Unit_hipGraphUpload_Functional_multidevice_test", + "=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 31)]", + "Unit_Device_sinpi_Accuracy_Positive - double", + "Unit_Device_cospi_Accuracy_Positive - double", + "Unit_Device_sincospi_Accuracy_Positive - double", + "=== special values test, fix: comment out [-3.0f, -1.5f, HEX_FLT(-, 0, 00000c, -, 126), HEX_FLT(-, 0, 000006, -, 126), +3.0f, 1.5f, HEX_FLT(+, 0, 00000c, -, 126), HEX_FLT(+, 0, 000006, -, 126),]", + "Unit_Device_fma_Accuracy_Positive - float", + "=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(-, 1, fffffffffffff, +, 30), HEX_DBL(+, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 30)]", + "Unit_Device_fdim_Accuracy_Positive - double", + "=== TODO special value test error, fix: comment out special value test", + "Unit_Device_remquo_Accuracy_Positive - float", + "Unit_Device_remquo_Accuracy_Positive - double", + "=== float* can be casted to double* [math_remainder_rounding_negative_kernels_rtc.hh:247]", + "Unit_Device_modf_modff_Negative_RTC", + "=== TODO round error", + "Unit_Device___fsqrt_rn_Accuracy_Positive", + "Unit_Device___frsqrt_rn_Accuracy_Positive", + "=== TODO terminated, ulp formula seems to be wrong [produces ulp of 4294967298]", + "Unit_Device___expf_Accuracy_Positive", + "Unit_Device___exp10f_Accuracy_Positive", + "=== TODO error, input +-1.40129846e-45", + "Unit_Device___log2f_Accuracy_Positive", + "=== special values test, fix: comment out [HEX_DBL(-, 0, 0000000000001, -, 1022)]", + "Unit_Device_log1p_Accuracy_Positive - double", + "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", + "Unit_Device_ilogbf_Accuracy_Positive", + "Unit_Device_ilogb_Accuracy_Positive", + "=== rounding float16 types not supported?", + "Unit_Device___float2half_rd_Accuracy_Limited_Positive", + "Unit_Device___float2half_ru_Accuracy_Limited_Positive", + "Unit_Device___float2half_rz_Accuracy_Limited_Positive", + "=== long int exponents are too large, works fine with int", + "Unit_Device_scalbln_Accuracy_Positive - float", + "Unit_Device_scalbln_Accuracy_Positive - double", + "=== Float16 problem", + "Unit_Device_hcos_Accuracy_Positive", + "Unit_Device_h2cos_Accuracy_Positive", + "Unit_Device_hsin_Accuracy_Positive", + "Unit_Device_h2sin_Accuracy_Positive", + "=== TODO [first 2 passed] Below 2 tests are disable due to defect EXSWHTEC-356 ===", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", + "=== TODO ===", + "Unit_Device_tgammaf_Accuracy_Limited_Positive", + "=== TODO === fail on 100% test data", + "Unit_Device_hexp10_Accuracy_Positive", + "Unit_Device_h2exp10_Accuracy_Positive", + "Unit_Device_hexp2_Accuracy_Positive", + "Unit_Device_h2exp2_Accuracy_Positive", + "Unit_Device_hlog_Accuracy_Positive", + "Unit_Device_h2log_Accuracy_Positive", + "Unit_Device_hlog10_Accuracy_Positive", + "Unit_Device_h2log10_Accuracy_Positive", + "Unit_Device___hfma2_Accuracy_Positive", #endif #if defined gfx90a || defined gfx942 || defined gfx950 "=== SWDEV-443630 : Below test failed in stress test on 19/01/24 ===", @@ -575,13 +155,6 @@ "Unit_Warp_Shfl_Down_Positive_Basic - double", "Unit_Warp_Shfl_Down_Positive_Basic - __half", "Unit_Warp_Shfl_Down_Positive_Basic - __half2", - "Unit_Device_norm_Sanity_Positive - float", - "Unit_Device_norm_Sanity_Positive - double", - "Unit_Device_rnorm_Sanity_Positive - float", - "Unit_Device_rnorm_Sanity_Positive - double", - "Unit_Device___float2half_rd_SmallVals_Sanity_Positive", - "Unit_Device___float2half_ru_SmallVals_Sanity_Positive", - "Unit_Device___float2half_rz_SmallVals_Sanity_Positive", #endif #if defined gfx1200 || defined gfx1201 "=== SWDEV-470751 : Fine Grain memory is MTYPE_NC due to HW bug.", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json index 7c902c5f39..98688479c7 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json @@ -12,5 +12,60 @@ "=== Below tests are failing PSDB ===", "Unit_Assert_Positive_Basic_KernelFail", "Unit_hipMemMapArrayAsync_Positive_Basic", + "=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 31)]", + "Unit_Device_sinpi_Accuracy_Positive - double", + "Unit_Device_cospi_Accuracy_Positive - double", + "Unit_Device_sincospi_Accuracy_Positive - double", + "=== special values test, fix: comment out [-3.0f, -1.5f, HEX_FLT(-, 0, 00000c, -, 126), HEX_FLT(-, 0, 000006, -, 126), +3.0f, 1.5f, HEX_FLT(+, 0, 00000c, -, 126), HEX_FLT(+, 0, 000006, -, 126),]", + "Unit_Device_fma_Accuracy_Positive - float", + "=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(-, 1, fffffffffffff, +, 30), HEX_DBL(+, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 30)]", + "Unit_Device_fdim_Accuracy_Positive - double", + "=== TODO special value test error, fix: comment out special value test", + "Unit_Device_remquo_Accuracy_Positive - float", + "Unit_Device_remquo_Accuracy_Positive - double", + "=== float* can be casted to double* [math_remainder_rounding_negative_kernels_rtc.hh:247]", + "Unit_Device_modf_modff_Negative_RTC", + "=== TODO round error", + "Unit_Device___frsqrt_rn_Accuracy_Positive", + "=== TODO terminated, ulp formula seems to be wrong [produces ulp of 4294967298]", + "Unit_Device___expf_Accuracy_Positive", + "Unit_Device___exp10f_Accuracy_Positive", + "=== rounding float16 types not supported?", + "Unit_Device___float2half_rd_Accuracy_Limited_Positive", + "Unit_Device___float2half_ru_Accuracy_Limited_Positive", + "Unit_Device___float2half_rz_Accuracy_Limited_Positive", + "=== TODO rounding error ===", + "Unit_Device___half2int_rn_Accuracy_Positive", + "Unit_Device___half2int_rd_Accuracy_Positive", + "Unit_Device___half2int_ru_Accuracy_Positive", + "Unit_Device___half2uint_rn_Accuracy_Positive", + "Unit_Device___half2uint_ru_Accuracy_Positive", + "Unit_Device___half2short_rn_Accuracy_Positive", + "Unit_Device___half2short_rd_Accuracy_Positive", + "Unit_Device___half2short_ru_Accuracy_Positive", + "Unit_Device___half2ushort_rn_Accuracy_Positive", + "Unit_Device___half2ushort_ru_Accuracy_Positive", + "Unit_Device___half2ll_rn_Accuracy_Positive", + "Unit_Device___half2ll_rd_Accuracy_Positive", + "Unit_Device___half2ll_ru_Accuracy_Positive", + "Unit_Device___half2ull_rn_Accuracy_Positive", + "Unit_Device___half2ull_ru_Accuracy_Positive", + "Unit_Device___int2half_rz_Accuracy_Positive", + "Unit_Device___int2half_rd_Accuracy_Positive", + "Unit_Device___int2half_ru_Accuracy_Positive", + "Unit_Device___uint2half_rz_Accuracy_Positive", + "Unit_Device___uint2half_rd_Accuracy_Positive", + "Unit_Device___uint2half_ru_Accuracy_Positive", + "Unit_Device___short2half_rz_Accuracy_Positive", + "Unit_Device___short2half_rd_Accuracy_Positive", + "Unit_Device___short2half_ru_Accuracy_Positive", + "Unit_Device___ushort2half_rz_Accuracy_Positive", + "Unit_Device___ushort2half_rd_Accuracy_Positive", + "Unit_Device___ushort2half_ru_Accuracy_Positive", + "Unit_Device___ll2half_rz_Accuracy_Positive", + "Unit_Device___ll2half_rd_Accuracy_Positive", + "Unit_Device___ll2half_ru_Accuracy_Positive", + "Unit_Device___ull2half_rz_Accuracy_Positive", + "Unit_Device___ull2half_rd_Accuracy_Positive" ] } diff --git a/projects/hip-tests/catch/hipTestMain/main.cc b/projects/hip-tests/catch/hipTestMain/main.cc index a142992cc9..3343b0db80 100644 --- a/projects/hip-tests/catch/hipTestMain/main.cc +++ b/projects/hip-tests/catch/hipTestMain/main.cc @@ -59,13 +59,19 @@ int main(int argc, char** argv) { ("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)") | Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory") ["-M"]["--accuracy-max-memory"] - ("Percentage of global device memory allowed for math accuracy tests (default: 80%)") + ("Percentage of global device memory allowed for math accuracy tests in case the global device memory is lower than max_memory (default: 80%)") | Opt(cmd_options.reduce_iterations, "reduce_iterations") ["-R"]["--reduce-iterations"] ("Number of iterations for fuzzing reduce operations (default: 1)") | Opt(cmd_options.reduce_input_size, "reduce_input_size") ["-Z"]["--reduce-input-size"] ("Size of the input for the reduce sync operations performance test (megabytes) (default: 50)") + | Opt(cmd_options.max_memory, "max_memory") + ["-X"]["--max-memory"] + ("Maximum amount of memory to use for math accuracy tests (default: 2GB)") + | Opt(cmd_options.reduction_factor, "reduction_factor") + ["-R"]["--reduction-factor"] + ("Percentage of test data to be actually tested (default: 0.1%)") ; // clang-format on diff --git a/projects/hip-tests/catch/include/cmd_options.hh b/projects/hip-tests/catch/include/cmd_options.hh index 21b535dfa8..e736c63472 100644 --- a/projects/hip-tests/catch/include/cmd_options.hh +++ b/projects/hip-tests/catch/include/cmd_options.hh @@ -36,6 +36,8 @@ struct CmdOptions { uint64_t reduce_iterations = 1; uint64_t reduce_input_size = 50; int accuracy_max_memory = 80; + uint64_t max_memory = 2147483648; // 2 GB + double reduction_factor = 0.1; }; extern CmdOptions cmd_options; diff --git a/projects/hip-tests/catch/unit/math/binary_common.hh b/projects/hip-tests/catch/unit/math/binary_common.hh index 395fb28c79..c0764e713b 100644 --- a/projects/hip-tests/catch/unit/math/binary_common.hh +++ b/projects/hip-tests/catch/unit/math/binary_common.hh @@ -35,7 +35,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ if constexpr (std::is_same_v) { \ ys[i] = func_name##f(x1s[i], x2s[i]); \ } else if constexpr (std::is_same_v) { \ diff --git a/projects/hip-tests/catch/unit/math/casting_common.hh b/projects/hip-tests/catch/unit/math/casting_common.hh index 9fb56d3674..84559b2f9c 100644 --- a/projects/hip-tests/catch/unit/math/casting_common.hh +++ b/projects/hip-tests/catch/unit/math/casting_common.hh @@ -31,7 +31,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(xs[i]); \ } \ } @@ -42,7 +42,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i]); \ } \ } @@ -93,7 +93,10 @@ void CastUnaryHalfPrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, const ValidatorBuilder& validator_builder) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); - uint64_t stop = std::numeric_limits::max() + 1ul; + const auto reduction_factor = GetTestReductionFactor(); + const auto inv_reduction_factor = 1 / reduction_factor; + const auto stop = static_cast( + std::ceil((std::numeric_limits::max() + 1ul) * reduction_factor)); const auto max_batch_size = std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop); LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)}; @@ -114,10 +117,11 @@ void CastUnaryHalfPrecisionBruteForceTest(kernel_sig kernel, const auto sub_batch_size = min_sub_batch_size + (i < tail); thread_pool.Post([=, &values] { - auto t = v; + auto t = v * inv_reduction_factor; uint16_t val; for (auto j = 0u; j < sub_batch_size; ++j) { - val = static_cast(t++); + val = static_cast(std::floor(t)); + t += inv_reduction_factor; values.ptr()[base_idx + j] = *reinterpret_cast(&val); if (std::isnan(values.ptr()[base_idx + j]) || std::isinf(values.ptr()[base_idx + j])) { values.ptr()[base_idx + j] = 0; @@ -178,15 +182,24 @@ void CastIntRangeTest(kernel_sig kernel, ref_sig ref_func, const TArg a = std::numeric_limits::lowest(), const TArg b = std::numeric_limits::max()) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto reduction_factor = GetTestReductionFactor(); + const auto inv_reduction_factor = 1 / reduction_factor; const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg)); LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; MathTest math_test(kernel, max_batch_size); + bool running = true; size_t inserted = 0u; - for (TArg v = a; v <= b; v++) { - values.ptr()[inserted++] = v; - if (inserted < max_batch_size) continue; + auto v = static_cast(a); + while (running) { + if (std::floor(v) > b) { + running = false; + } else { + values.ptr()[inserted++] = static_cast(std::floor(v)); + v += inv_reduction_factor; + if (inserted < max_batch_size) continue; + } math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); inserted = 0u; @@ -240,17 +253,27 @@ void CastBinaryIntRangeTest(kernel_sig kernel, ref_sig r const T2 a = std::numeric_limits::lowest(), const T2 b = std::numeric_limits::max()) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto reduction_factor = GetTestReductionFactor(); + const auto inv_reduction_factor = 1 / reduction_factor; const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) + 2 * sizeof(T2)); LinearAllocGuard values1{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; LinearAllocGuard values2{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; MathTest math_test(kernel, max_batch_size); + bool running = true; size_t inserted = 0u; - for (T2 v = a; v <= b; v++) { - values1.ptr()[inserted] = v; - values2.ptr()[inserted++] = b - v; - if (inserted < max_batch_size) continue; + auto v = static_cast(a); + while (running) { + if (std::floor(v) > b) { + running = false; + } else { + const auto t = static_cast(std::floor(v)); + values1.ptr()[inserted] = t; + values2.ptr()[inserted++] = b - t; + v += inv_reduction_factor; + if (inserted < max_batch_size) continue; + } math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values1.ptr(), values2.ptr()); diff --git a/projects/hip-tests/catch/unit/math/casting_half2_common.hh b/projects/hip-tests/catch/unit/math/casting_half2_common.hh index 085ae46ccc..2b4496ccf5 100644 --- a/projects/hip-tests/catch/unit/math/casting_half2_common.hh +++ b/projects/hip-tests/catch/unit/math/casting_half2_common.hh @@ -31,7 +31,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(__half2{xs[i], -xs[i]}); \ } \ } @@ -42,7 +42,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(__half2{x1s[i], -x1s[i]}, __half2{x2s[i], -x2s[i]}); \ } \ } diff --git a/projects/hip-tests/catch/unit/math/casting_half2_funcs.cc b/projects/hip-tests/catch/unit/math/casting_half2_funcs.cc index 561fec8de3..844df3e2e8 100644 --- a/projects/hip-tests/catch/unit/math/casting_half2_funcs.cc +++ b/projects/hip-tests/catch/unit/math/casting_half2_funcs.cc @@ -318,7 +318,7 @@ __global__ void __float22half2_rn_kernel(__half2* const ys, const size_t num_xs, const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { ys[i] = __float22half2_rn(make_float2(xs[i], -xs[i])); } } diff --git a/projects/hip-tests/catch/unit/math/casting_half_float_funcs.cc b/projects/hip-tests/catch/unit/math/casting_half_float_funcs.cc index c29123c9d1..4c40642ac4 100644 --- a/projects/hip-tests/catch/unit/math/casting_half_float_funcs.cc +++ b/projects/hip-tests/catch/unit/math/casting_half_float_funcs.cc @@ -36,7 +36,7 @@ THE SOFTWARE. TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Limited_Positive") { \ Float16 (*ref)(float) = kern_name##_ref; \ UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ - std::numeric_limits::min(), 0.f); \ + std::numeric_limits::lowest(), 0.f); \ UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ 0.0001f, std::numeric_limits::max()); \ } @@ -48,7 +48,7 @@ THE SOFTWARE. TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ Float16 (*ref)(float) = kern_name##_ref; \ UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ - std::numeric_limits::min(), \ + std::numeric_limits::lowest(), \ std::numeric_limits::max()); \ } diff --git a/projects/hip-tests/catch/unit/math/casting_int_funcs.cc b/projects/hip-tests/catch/unit/math/casting_int_funcs.cc index 9a986bf462..a04e0dd593 100644 --- a/projects/hip-tests/catch/unit/math/casting_int_funcs.cc +++ b/projects/hip-tests/catch/unit/math/casting_int_funcs.cc @@ -688,7 +688,7 @@ __global__ void __hiloint2double_kernel(double* const ys, const size_t num_xs, i const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { ys[i] = __hiloint2double(x1s[i], x2s[i]); } } diff --git a/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc b/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc index 69e5e2a8d0..6677c343cb 100644 --- a/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc +++ b/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc @@ -33,7 +33,7 @@ THE SOFTWARE. const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(xs[i]); \ } \ } @@ -100,7 +100,7 @@ MATH_UNARY_DP_TEST_DEF_IMPL(__dsqrt_rn, static_cast(std::sqr const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i]); \ } \ } @@ -206,7 +206,7 @@ MATH_BINARY_DP_TEST_DEF_IMPL(__ddiv_rn, __ddiv_rn_ref, EqValidatorBuilderFactory const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ } \ } diff --git a/projects/hip-tests/catch/unit/math/half_precision_common.hh b/projects/hip-tests/catch/unit/math/half_precision_common.hh index 1f494058b0..060e11627d 100644 --- a/projects/hip-tests/catch/unit/math/half_precision_common.hh +++ b/projects/hip-tests/catch/unit/math/half_precision_common.hh @@ -34,7 +34,7 @@ THE SOFTWARE. const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(xs[i]); \ } \ } @@ -59,7 +59,7 @@ THE SOFTWARE. const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i]); \ } \ } @@ -85,7 +85,7 @@ THE SOFTWARE. const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ } \ } diff --git a/projects/hip-tests/catch/unit/math/half_precision_comparison.cc b/projects/hip-tests/catch/unit/math/half_precision_comparison.cc index 7ef0e9bfcf..2d462f8d8d 100644 --- a/projects/hip-tests/catch/unit/math/half_precision_comparison.cc +++ b/projects/hip-tests/catch/unit/math/half_precision_comparison.cc @@ -35,7 +35,7 @@ THE SOFTWARE. const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(xs[i]); \ } \ } \ @@ -120,7 +120,7 @@ MATH_UNARY_HP_TEST_DEF_IMPL(__hisnan2, __hisnan2_ref, EqValidatorBuilderFactory< const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i]); \ } \ } \ diff --git a/projects/hip-tests/catch/unit/math/log_funcs.cc b/projects/hip-tests/catch/unit/math/log_funcs.cc index 216caba2a9..337c32ff04 100644 --- a/projects/hip-tests/catch/unit/math/log_funcs.cc +++ b/projects/hip-tests/catch/unit/math/log_funcs.cc @@ -35,7 +35,8 @@ THE SOFTWARE. * ------------------------ * - Tests the numerical accuracy of `logf(x)` for all possible inputs and `log(x)` against a * table of difficult values, followed by a large number of randomly generated values. The results - * are compared against reference function `T std::log(T)`. The maximum ulp error is 1. + * are compared against reference function `T std::log(T)`. The maximum ulp error + * for single precision is 2 and for double precision is 1. * * Test source * ------------------------ @@ -44,7 +45,7 @@ THE SOFTWARE. * ------------------------ * - HIP_VERSION >= 5.2 */ -MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log, 1, 1) +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log, 2, 1) /** * Test Description @@ -187,7 +188,7 @@ __global__ void ilogb_kernel(int* const ys, const size_t num_xs, T* const xs) { const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { if constexpr (std::is_same_v) { ys[i] = ilogbf(xs[i]); } else if constexpr (std::is_same_v) { diff --git a/projects/hip-tests/catch/unit/math/math_common.hh b/projects/hip-tests/catch/unit/math/math_common.hh index 4f7e5ddd29..08bd720816 100644 --- a/projects/hip-tests/catch/unit/math/math_common.hh +++ b/projects/hip-tests/catch/unit/math/math_common.hh @@ -212,10 +212,17 @@ template auto GetOccupancyMaxPotentialBlockSize(F kernel) { inline size_t GetMaxAllowedDeviceMemoryUsage() { hipDeviceProp_t props; HIP_CHECK(hipGetDeviceProperties(&props, 0)); - return props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f); + return props.totalGlobalMem > cmd_options.max_memory + ? cmd_options.max_memory + : props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f); } -inline uint64_t GetTestIterationCount() { return cmd_options.accuracy_iterations; } +inline double GetTestReductionFactor() { return cmd_options.reduction_factor * 0.01; } + +inline uint64_t GetTestIterationCount() { + return static_cast( + std::ceil(cmd_options.accuracy_iterations * GetTestReductionFactor())); +} template using kernel_sig = void (*)(T*, const size_t, Ts*...); @@ -254,3 +261,71 @@ template void NegativeTestRTCWrapper(const char* program_source) HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); REQUIRE(error_count == expected_error_count); } + +inline void SinglePrecisionReducedRun(std::function run, + const LinearAllocGuard& values, const float a, + const float b, const double reduction_factor, + const size_t max_batch_size) { + bool test_positive = true; + float positive_start = 0.0f; + float positive_end = 0.0f; + bool test_negative = true; + float negative_start = 0.0f; + float negative_end = 0.0f; + if (a < 0 && b <= 0) { + test_positive = false; + negative_start = -b; + negative_end = -a; + } + if (a < 0 && b > 0) { + positive_start = 0.0f; + positive_end = b; + negative_start = 0.0f; + negative_end = -a; + } + if (a >= 0 && b > 0) { + positive_start = a; + positive_end = b; + test_negative = false; + } + + const auto inv_reduction_factor = 1 / reduction_factor; + size_t inserted = 0u; + + constexpr int radix = std::numeric_limits::radix; + + float increment = std::numeric_limits::min() * std::numeric_limits::epsilon(); + float limit = std::numeric_limits::min() * radix; + + const auto iterate = [&](float start, float end, int positive) { + for (float v = start; v < end; limit *= radix, increment *= radix) { + const auto start_v = v; + double count = 0ul; + while (v < limit && v < end) { + values.ptr()[inserted++] = (v * positive); + count += inv_reduction_factor; + v = start_v + increment * static_cast(std::floor(count)); + if (inserted < max_batch_size) continue; + + run(inserted); + inserted = 0u; + } + } + + if (inserted > 0u) { + run(inserted); + inserted = 0u; + } + }; + + if (test_positive) { + iterate(positive_start, positive_end, 1); + } + + increment = std::numeric_limits::min() * std::numeric_limits::epsilon(); + limit = std::numeric_limits::min() * radix; + + if (test_negative) { + iterate(negative_start, negative_end, -1); + } +} diff --git a/projects/hip-tests/catch/unit/math/misc_funcs.cc b/projects/hip-tests/catch/unit/math/misc_funcs.cc index 35e21fb26e..93fd39c03c 100644 --- a/projects/hip-tests/catch/unit/math/misc_funcs.cc +++ b/projects/hip-tests/catch/unit/math/misc_funcs.cc @@ -37,12 +37,12 @@ TEST_CASE("Unit_Device_fmax_fmaxf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFm MATH_BINARY_WITHIN_ULP_TEST_DEF(fmin, std::fmin, 0, 0) TEST_CASE("Unit_Device_fmin_fminf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmin); } -MATH_BINARY_WITHIN_ULP_TEST_DEF(nextafter, std::nextafter, 0, 0) +MATH_BINARY_WITHIN_ULP_TEST_DEF(nextafter, std::nextafter, 1, 1) TEST_CASE("Unit_Device_nextafter_nextafterf_Negative_RTC") { NegativeTestRTCWrapper<8>(kNextAfter); } -MATH_TERNARY_WITHIN_ULP_TEST_DEF(fma, std::fma, 0, 0) +MATH_TERNARY_WITHIN_ULP_TEST_DEF(fma, std::fma, 0, 1) TEST_CASE("Unit_Device_fma_fmaf_Negative_RTC") { NegativeTestRTCWrapper<12>(kFma); } __global__ void fdividef_kernel(float* const ys, const size_t num_xs, float* const x1s, @@ -50,7 +50,7 @@ __global__ void fdividef_kernel(float* const ys, const size_t num_xs, float* con const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { ys[i] = fdividef(x1s[i], x2s[i]); } } @@ -68,7 +68,7 @@ TEST_CASE("Unit_Device_fdividef_Negative_RTC") { NegativeTestRTCWrapper<4>(kFdiv const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = kern_name(xs[i]); \ } \ } \ diff --git a/projects/hip-tests/catch/unit/math/misc_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/math/misc_negative_kernels_rtc.hh index 66521da090..d2c00b5876 100644 --- a/projects/hip-tests/catch/unit/math/misc_negative_kernels_rtc.hh +++ b/projects/hip-tests/catch/unit/math/misc_negative_kernels_rtc.hh @@ -123,9 +123,9 @@ class Dummy { __device__ ~Dummy() {} }; __global__ void fdividef_kernel_v1(float* x, float y) { float result = fdividef(x, y); } -__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdivide(x); } -__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdivide(x); } -__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdividef(x, y); } +__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdividef(x, y); } +__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdividef(x, y); } )"}; static constexpr auto kIsFinite{R"( diff --git a/projects/hip-tests/catch/unit/math/pow_common.hh b/projects/hip-tests/catch/unit/math/pow_common.hh index 95402c72d1..f09fade083 100644 --- a/projects/hip-tests/catch/unit/math/pow_common.hh +++ b/projects/hip-tests/catch/unit/math/pow_common.hh @@ -35,7 +35,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ if constexpr (std::is_same_v) { \ ys[i] = func_name##f(x1s[i], x2s[i]); \ } else if constexpr (std::is_same_v) { \ diff --git a/projects/hip-tests/catch/unit/math/pow_funcs.cc b/projects/hip-tests/catch/unit/math/pow_funcs.cc index 113a15accd..1e80c6f28b 100644 --- a/projects/hip-tests/catch/unit/math/pow_funcs.cc +++ b/projects/hip-tests/catch/unit/math/pow_funcs.cc @@ -183,7 +183,7 @@ __global__ void frexp_kernel(std::pair* const ys, const size_t num_xs, T const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { if constexpr (std::is_same_v) { ys[i].first = frexpf(xs[i], &ys[i].second); } else if constexpr (std::is_same_v) { diff --git a/projects/hip-tests/catch/unit/math/quaternary_common.hh b/projects/hip-tests/catch/unit/math/quaternary_common.hh index a9a8cc6778..0cc5bf22c5 100644 --- a/projects/hip-tests/catch/unit/math/quaternary_common.hh +++ b/projects/hip-tests/catch/unit/math/quaternary_common.hh @@ -35,7 +35,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ if constexpr (std::is_same_v) { \ ys[i] = func_name##f(x1s[i], x2s[i], x3s[i], x4s[i]); \ } else if constexpr (std::is_same_v) { \ diff --git a/projects/hip-tests/catch/unit/math/remainder_and_rounding_funcs.cc b/projects/hip-tests/catch/unit/math/remainder_and_rounding_funcs.cc index 2eea65e746..f761cb92a3 100644 --- a/projects/hip-tests/catch/unit/math/remainder_and_rounding_funcs.cc +++ b/projects/hip-tests/catch/unit/math/remainder_and_rounding_funcs.cc @@ -91,7 +91,7 @@ template __global__ void remquo_kernel(std::pair* const ys, const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { if constexpr (std::is_same_v) { ys[i].first = remquof(x1s[i], x2s[i], &ys[i].second); } else if constexpr (std::is_same_v) { @@ -123,7 +123,7 @@ __global__ void modf_kernel(std::pair* const ys, const size_t num_xs, T* c const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { if constexpr (std::is_same_v) { ys[i].first = modff(xs[i], &ys[i].second); } else if constexpr (std::is_same_v) { diff --git a/projects/hip-tests/catch/unit/math/root_funcs.cc b/projects/hip-tests/catch/unit/math/root_funcs.cc index c7aea0989d..4c4e88f6ba 100644 --- a/projects/hip-tests/catch/unit/math/root_funcs.cc +++ b/projects/hip-tests/catch/unit/math/root_funcs.cc @@ -265,7 +265,7 @@ MATH_BINARY_KERNEL_DEF(rhypot) * ------------------------ * - Tests the numerical accuracy of `rhypotf(x, y)` and `rhypot(x, y)`against a table of * difficult values, followed by a large number of randomly generated values. The maximum ulp error - * for single precision is 2 and for double precision is 1. + * is 2. * * Test source * ------------------------ @@ -278,7 +278,7 @@ TEMPLATE_TEST_CASE("Unit_Device_rhypot_Accuracy_Positive", "", float, double) { using RT = RefType_t; auto rhypot_ref = [](RT arg1, RT arg2) -> RT { return 1. / std::hypot(arg1, arg2); }; RT (*ref)(RT, RT) = rhypot_ref; - const auto ulp = std::is_same_v ? 2 : 1; + const auto ulp = std::is_same_v ? 2 : 2; BinaryFloatingPointTest(rhypot_kernel, ref, ULPValidatorBuilderFactory(ulp)); } @@ -348,7 +348,7 @@ MATH_TERNARY_KERNEL_DEF(rnorm3d) * ------------------------ * - Tests the numerical accuracy of `rnorm3df(x, y, z)` and `rnorm3d(x, y, z)`against a table of * difficult values, followed by a large number of randomly generated values. The maximum ulp error - * for single precision is 2 and for double precision is 1. + * is 2. * * Test source * ------------------------ @@ -366,7 +366,7 @@ TEMPLATE_TEST_CASE("Unit_Device_rnorm3d_Accuracy_Positive", "", float, double) { return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); }; RT (*ref)(RT, RT, RT) = rnorm3d_ref; - const auto ulp = std::is_same_v ? 2 : 1; + const auto ulp = std::is_same_v ? 2 : 2; TernaryFloatingPointTest(rnorm3d_kernel, ref, ULPValidatorBuilderFactory(ulp)); } @@ -438,7 +438,7 @@ MATH_QUATERNARY_KERNEL_DEF(rnorm4d) * ------------------------ * - Tests the numerical accuracy of `rnorm4df(x, y, z, t)` and `rnorm4d(x, y, z, t)`against a * table of difficult values, followed by a large number of randomly generated values. The maximum - * ulp error for single precision is 2 and for double precision is 1. + * ulp error is 2. * * Test source * ------------------------ @@ -456,7 +456,7 @@ TEMPLATE_TEST_CASE("Unit_Device_rnorm4d_Accuracy_Positive", "", float, double) { return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); }; RT (*ref)(RT, RT, RT, RT) = rnorm4d_ref; - const auto ulp = std::is_same_v ? 2 : 1; + const auto ulp = std::is_same_v ? 2 : 2; QuaternaryFloatingPointTest(rnorm4d_kernel, ref, ULPValidatorBuilderFactory(ulp)); } @@ -488,7 +488,7 @@ TEST_CASE("Unit_Device_rnorm4d_rnorm4df_Negative_RTC") { NegativeTestRTCWrapper< template void NormSimpleTest(F kernel, RF ref_func, const ValidatorBuilder& validator_builder) { - const auto max_dim = 10000; + const auto max_dim = static_cast(std::ceil(10000 * GetTestReductionFactor())); LinearAllocGuard x{LinearAllocs::hipHostMalloc, max_dim * sizeof(T)}; LinearAllocGuard x_dev{LinearAllocs::hipMalloc, max_dim * sizeof(T)}; diff --git a/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc b/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc index f0c12a67fc..3879fc6100 100644 --- a/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc +++ b/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc @@ -33,7 +33,7 @@ THE SOFTWARE. const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(xs[i]); \ } \ } @@ -359,7 +359,7 @@ MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_cos, static_cast(std:: const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i]); \ } \ } @@ -493,7 +493,7 @@ MATH_BINARY_SP_TEST_DEF(__fdividef, __fdiv_rn_ref); const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ } \ } diff --git a/projects/hip-tests/catch/unit/math/special_common.hh b/projects/hip-tests/catch/unit/math/special_common.hh index 4b55a88fee..ac821c9257 100644 --- a/projects/hip-tests/catch/unit/math/special_common.hh +++ b/projects/hip-tests/catch/unit/math/special_common.hh @@ -32,7 +32,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ if constexpr (std::is_same_v) { \ ys[i] = func_name##f(n[i], xs[i]); \ } else if constexpr (std::is_same_v) { \ @@ -96,6 +96,7 @@ void BesselSinglePrecisionRangeTest(kernel_bessel_n_sig kernel, const ValidatorBuilder& validator_builder, int n_input, const float a, const float b) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto reduction_factor = GetTestReductionFactor(); const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) * 2 + sizeof(int)); LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(int)}; LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; @@ -103,15 +104,11 @@ void BesselSinglePrecisionRangeTest(kernel_bessel_n_sig kernel, MathTest math_test(kernel, max_batch_size); std::fill_n(x1s.ptr(), max_batch_size, n_input); - size_t inserted = 0u; - for (float v = a; v != b; v = std::nextafter(v, b)) { - x2s.ptr()[inserted++] = v; - if (inserted < max_batch_size) continue; + const auto run = [&, gs = grid_size, bs = block_size](size_t inserted) { + math_test.Run(validator_builder, gs, bs, ref_func, inserted, x1s.ptr(), x2s.ptr()); + }; - math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, x1s.ptr(), - x2s.ptr()); - inserted = 0u; - } + SinglePrecisionReducedRun(run, x2s, a, b, reduction_factor, max_batch_size); } template diff --git a/projects/hip-tests/catch/unit/math/special_funcs.cc b/projects/hip-tests/catch/unit/math/special_funcs.cc index fdc1380fc1..1e4266dd18 100644 --- a/projects/hip-tests/catch/unit/math/special_funcs.cc +++ b/projects/hip-tests/catch/unit/math/special_funcs.cc @@ -103,7 +103,7 @@ MATH_UNARY_KERNEL_DEF(erfinv) * ------------------------ * - Tests the numerical accuracy of `erfinvf(x)` for all possible inputs. The results are * compared against reference function `double boost::math::erf_inv(double)`. The maximum ulp error - * is 2. + * is 4. * * Test source * ------------------------ @@ -124,7 +124,7 @@ TEST_CASE("Unit_Device_erfinvf_Accuracy_Positive") { return boost::math::erf_inv(arg); }; double (*ref)(double) = erfinv_ref; - UnarySinglePrecisionTest(erfinv_kernel, ref, ULPValidatorBuilderFactory(2)); + UnarySinglePrecisionTest(erfinv_kernel, ref, ULPValidatorBuilderFactory(4)); } /** @@ -535,9 +535,9 @@ MATH_UNARY_KERNEL_DEF(lgamma) */ TEST_CASE("Unit_Device_lgammaf_Accuracy_Limited_Positive") { double (*ref)(double) = std::lgamma; - UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(6), + UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(7), std::numeric_limits::lowest(), -11.0001f); - UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(6), + UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(7), -2.2636f, std::numeric_limits::max()); } diff --git a/projects/hip-tests/catch/unit/math/ternary_common.hh b/projects/hip-tests/catch/unit/math/ternary_common.hh index fef750bded..50d48828c8 100644 --- a/projects/hip-tests/catch/unit/math/ternary_common.hh +++ b/projects/hip-tests/catch/unit/math/ternary_common.hh @@ -35,7 +35,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ if constexpr (std::is_same_v) { \ ys[i] = func_name##f(x1s[i], x2s[i], x3s[i]); \ } else if constexpr (std::is_same_v) { \ diff --git a/projects/hip-tests/catch/unit/math/trig_funcs.cc b/projects/hip-tests/catch/unit/math/trig_funcs.cc index 9671b94ab9..75c2e94e0d 100644 --- a/projects/hip-tests/catch/unit/math/trig_funcs.cc +++ b/projects/hip-tests/catch/unit/math/trig_funcs.cc @@ -36,7 +36,7 @@ TEST_CASE("Unit_Device_cos_cosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCos) MATH_UNARY_WITHIN_ULP_TEST_DEF(tan, std::tan, 4, 2) TEST_CASE("Unit_Device_tan_tanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTan); } -MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 2, 2) +MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 3, 2) TEST_CASE("Unit_Device_asin_asinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsin); } MATH_UNARY_WITHIN_ULP_TEST_DEF(acos, std::acos, 2, 2) @@ -78,7 +78,7 @@ __global__ void sincos_kernel(std::pair* const ys, const size_t num_xs, T* const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { if constexpr (std::is_same_v) { sincosf(xs[i], &ys[i].first, &ys[i].second); } else if constexpr (std::is_same_v) { @@ -109,7 +109,7 @@ __global__ void sincospi_kernel(std::pair* const ys, const size_t num_xs, const auto tid = cg::this_grid().thread_rank(); const auto stride = cg::this_grid().size(); - for (auto i = tid; i < num_xs; i += stride) { + for (size_t i = tid; i < num_xs; i += stride) { if constexpr (std::is_same_v) { sincospif(xs[i], &ys[i].first, &ys[i].second); } else if constexpr (std::is_same_v) { diff --git a/projects/hip-tests/catch/unit/math/unary_common.hh b/projects/hip-tests/catch/unit/math/unary_common.hh index 180b963f4a..b5b181be90 100644 --- a/projects/hip-tests/catch/unit/math/unary_common.hh +++ b/projects/hip-tests/catch/unit/math/unary_common.hh @@ -34,7 +34,7 @@ namespace cg = cooperative_groups; const auto tid = cg::this_grid().thread_rank(); \ const auto stride = cg::this_grid().size(); \ \ - for (auto i = tid; i < num_xs; i += stride) { \ + for (size_t i = tid; i < num_xs; i += stride) { \ if constexpr (std::is_same_v) { \ ys[i] = func_name##f(xs[i]); \ } else if constexpr (std::is_same_v) { \ @@ -47,7 +47,10 @@ template void UnaryHalfPrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, const ValidatorBuilder& validator_builder) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); - uint64_t stop = std::numeric_limits::max() + 1ul; + const auto reduction_factor = GetTestReductionFactor(); + const auto inv_reduction_factor = 1 / reduction_factor; + const auto stop = static_cast( + std::ceil((std::numeric_limits::max() + 1ul) * reduction_factor)); const auto max_batch_size = std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop); LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)}; @@ -68,10 +71,11 @@ void UnaryHalfPrecisionBruteForceTest(kernel_sig kernel, ref_sig(t++); + val = static_cast(std::floor(t)); + t += inv_reduction_factor; values.ptr()[base_idx + j] = *reinterpret_cast(&val); } }); @@ -90,7 +94,10 @@ template void UnarySinglePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, const ValidatorBuilder& validator_builder) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); - uint64_t stop = std::numeric_limits::max() + 1ul; + const auto reduction_factor = GetTestReductionFactor(); + const auto inv_reduction_factor = 1 / reduction_factor; + const auto stop = static_cast( + std::ceil((std::numeric_limits::max() + 1ul) * reduction_factor)); const auto max_batch_size = std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)), stop); LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; @@ -111,10 +118,11 @@ void UnarySinglePrecisionBruteForceTest(kernel_sig kernel, ref_sig(t++); + val = static_cast(std::floor(t)); + t += inv_reduction_factor; values.ptr()[base_idx + j] = *reinterpret_cast(&val); } }); @@ -134,19 +142,17 @@ void UnarySinglePrecisionRangeTest(kernel_sig kernel, ref_sig values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; - + MathTest math_test(kernel, max_batch_size); - size_t inserted = 0u; - for (float v = a; v != b; v = std::nextafter(v, b)) { - values.ptr()[inserted++] = v; - if (inserted < max_batch_size) continue; + const auto run = [&, gs = grid_size, bs = block_size](size_t inserted) { + math_test.Run(validator_builder, gs, bs, ref_func, inserted, values.ptr()); + }; - math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); - inserted = 0u; - } + SinglePrecisionReducedRun(run, values, a, b, reduction_factor, max_batch_size); } template