SWDEV-503089 - Fix and enable disabled HIP tests from math group (#1319)
* SWDEV-503089 - Fix and enable disabled HIP tests from math group * SWDEV-503089 - Move single precision reduced run to a common function
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
ee48f6221d
Коммит
da9bb4efae
@@ -17,25 +17,6 @@
|
||||
"NOTE: The following 2 tests are disabled due to defect - EXSWHTEC-238",
|
||||
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Basic",
|
||||
"Unit_hipMemRangeGetAttribute_Positive_AccessedBy_Partial_Range",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-356 ===",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"=== Below 4 tests are disable due to defect EXSWHTEC-355 ===",
|
||||
"Unit_Device___hadd_Sanity_Positive",
|
||||
"Unit_Device___uhadd_Sanity_Positive",
|
||||
"Unit_Device___rhadd_Sanity_Positive",
|
||||
"Unit_Device___urhadd_Sanity_Positive",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"=== SWDEV-434171: Below tests took long time to complete in stress test on 17/11/23 ===",
|
||||
"Unit_Warp_Shfl_Positive_Basic - int",
|
||||
"Unit_Warp_Shfl_Positive_Basic - unsigned int",
|
||||
@@ -77,470 +58,8 @@
|
||||
"Unit_hipStreamBeginCapture_Positive_Functional",
|
||||
"Unit_Kernel_Launch_bounds_Negative_OutOfBounds",
|
||||
"Unit_Kernel_Launch_bounds_Negative_Parameters_RTC",
|
||||
"Unit_Device_sin_Accuracy_Positive - float",
|
||||
"Unit_Device_sin_Accuracy_Positive - double",
|
||||
"Unit_Device_cos_Accuracy_Positive - float",
|
||||
"Unit_Device_cos_Accuracy_Positive - double",
|
||||
"Unit_Device_tan_Accuracy_Positive - float",
|
||||
"Unit_Device_tan_Accuracy_Positive - double",
|
||||
"Unit_Device_asin_Accuracy_Positive - float",
|
||||
"Unit_Device_asin_Accuracy_Positive - double",
|
||||
"Unit_Device_acos_Accuracy_Positive - float",
|
||||
"Unit_Device_acos_Accuracy_Positive - double",
|
||||
"Unit_Device_atan_Accuracy_Positive - float",
|
||||
"Unit_Device_atan_Accuracy_Positive - double",
|
||||
"Unit_Device_sinh_Accuracy_Positive - float",
|
||||
"Unit_Device_sinh_Accuracy_Positive - double",
|
||||
"Unit_Device_cosh_Accuracy_Positive - float",
|
||||
"Unit_Device_cosh_Accuracy_Positive - double",
|
||||
"Unit_Device_tanh_Accuracy_Positive - float",
|
||||
"Unit_Device_tanh_Accuracy_Positive - double",
|
||||
"Unit_Device_asinh_Accuracy_Positive - float",
|
||||
"Unit_Device_asinh_Accuracy_Positive - double",
|
||||
"Unit_Device_acosh_Accuracy_Positive - float",
|
||||
"Unit_Device_acosh_Accuracy_Positive - double",
|
||||
"Unit_Device_atanh_Accuracy_Positive - float",
|
||||
"Unit_Device_atanh_Accuracy_Positive - double",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - float",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - double",
|
||||
"Unit_Device_cospi_Accuracy_Positive - float",
|
||||
"Unit_Device_cospi_Accuracy_Positive - double",
|
||||
"Unit_Device_tanpi_Accuracy_Positive - float",
|
||||
"Unit_Device_tanpi_Accuracy_Positive - double",
|
||||
"Unit_Device_atan2_Accuracy_Positive - float",
|
||||
"Unit_Device_atan2_Accuracy_Positive - double",
|
||||
"Unit_Device_sincos_Accuracy_Positive - float",
|
||||
"Unit_Device_sincos_Accuracy_Positive - double",
|
||||
"Unit_Device_sincospi_Accuracy_Positive - float",
|
||||
"Unit_Device_sincospi_Accuracy_Positive - double",
|
||||
"Unit_Device_fabs_Accuracy_Positive - float",
|
||||
"Unit_Device_fabs_Accuracy_Positive - double",
|
||||
"Unit_Device_copysign_Accuracy_Positive - float",
|
||||
"Unit_Device_copysign_Accuracy_Positive - double",
|
||||
"Unit_Device_fmax_Accuracy_Positive - float",
|
||||
"Unit_Device_fmax_Accuracy_Positive - double",
|
||||
"Unit_Device_fmin_Accuracy_Positive - float",
|
||||
"Unit_Device_fmin_Accuracy_Positive - double",
|
||||
"Unit_Device_nextafter_Accuracy_Positive - float",
|
||||
"Unit_Device_nextafter_Accuracy_Positive - double",
|
||||
"Unit_Device_fma_Accuracy_Positive - float",
|
||||
"Unit_Device_fma_Accuracy_Positive - double",
|
||||
"Unit_Device_fdividef_Accuracy_Positive",
|
||||
"Unit_Device_isfinite_Accuracy_Positive - float",
|
||||
"Unit_Device_isfinite_Accuracy_Positive - double",
|
||||
"Unit_Device_isinf_Accuracy_Positive - float",
|
||||
"Unit_Device_isinf_Accuracy_Positive - double",
|
||||
"Unit_Device_isnan_Accuracy_Positive - float",
|
||||
"Unit_Device_isnan_Accuracy_Positive - double",
|
||||
"Unit_Device_signbit_Accuracy_Positive - float",
|
||||
"Unit_Device_signbit_Accuracy_Positive - double",
|
||||
"Unit_Device_fmod_Accuracy_Positive - float",
|
||||
"Unit_Device_fmod_Accuracy_Positive - double",
|
||||
"Unit_Device_remainder_Accuracy_Positive - float",
|
||||
"Unit_Device_remainder_Accuracy_Positive - double",
|
||||
"Unit_Device_fdim_Accuracy_Positive - float",
|
||||
"Unit_Device_fdim_Accuracy_Positive - double",
|
||||
"Unit_Device_trunc_Accuracy_Positive - float",
|
||||
"Unit_Device_trunc_Accuracy_Positive - double",
|
||||
"Unit_Device_round_Accuracy_Positive - float",
|
||||
"Unit_Device_round_Accuracy_Positive - double",
|
||||
"Unit_Device_rint_Accuracy_Positive - float",
|
||||
"Unit_Device_rint_Accuracy_Positive - double",
|
||||
"Unit_Device_nearbyint_Accuracy_Positive - float",
|
||||
"Unit_Device_nearbyint_Accuracy_Positive - double",
|
||||
"Unit_Device_ceil_Accuracy_Positive - float",
|
||||
"Unit_Device_ceil_Accuracy_Positive - double",
|
||||
"Unit_Device_floor_Accuracy_Positive - float",
|
||||
"Unit_Device_floor_Accuracy_Positive - double",
|
||||
"Unit_Device_lrint_Accuracy_Positive - float",
|
||||
"Unit_Device_lrint_Accuracy_Positive - double",
|
||||
"Unit_Device_lround_Accuracy_Positive - float",
|
||||
"Unit_Device_lround_Accuracy_Positive - double",
|
||||
"Unit_Device_llrint_Accuracy_Positive - float",
|
||||
"Unit_Device_llrint_Accuracy_Positive - double",
|
||||
"Unit_Device_llround_Accuracy_Positive - float",
|
||||
"Unit_Device_llround_Accuracy_Positive - double",
|
||||
"Unit_Device_remquo_Accuracy_Positive - float",
|
||||
"Unit_Device_remquo_Accuracy_Positive - double",
|
||||
"Unit_Device_modf_Accuracy_Positive - float",
|
||||
"Unit_Device_modf_Accuracy_Positive - double",
|
||||
"=== Below tests cause timeout in stress test of 09/02/24 ===",
|
||||
"Unit_Device___half2half2_Accuracy_Positive",
|
||||
"Unit_Device_make_half2_Accuracy_Positive",
|
||||
"Unit_Device___halves2half2_Accuracy_Positive",
|
||||
"Unit_Device___low2half_Accuracy_Positive",
|
||||
"Unit_Device___high2half_Accuracy_Positive",
|
||||
"Unit_Device___low2half2_Accuracy_Positive",
|
||||
"Unit_Device___high2half2_Accuracy_Positive",
|
||||
"Unit_Device___lowhigh2highlow_Accuracy_Positive",
|
||||
"Unit_Device___lows2half2_Accuracy_Positive",
|
||||
"Unit_Device___highs2half2_Accuracy_Positive",
|
||||
"Unit_Device___float2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___floats2half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___float22half2_rn_Accuracy_Positive",
|
||||
"Unit_Device___low2float_Accuracy_Positive",
|
||||
"Unit_Device___high2float_Accuracy_Positive",
|
||||
"Unit_Device___half22float2_Accuracy_Positive",
|
||||
"Unit_Device_hcos_Accuracy_Positive",
|
||||
"Unit_Device_h2cos_Accuracy_Positive",
|
||||
"Unit_Device_hsin_Accuracy_Positive",
|
||||
"Unit_Device_h2sin_Accuracy_Positive",
|
||||
"Unit_Device_hexp_Accuracy_Positive",
|
||||
"Unit_Device_h2exp_Accuracy_Positive",
|
||||
"Unit_Device_hexp10_Accuracy_Positive",
|
||||
"Unit_Device_h2exp10_Accuracy_Positive",
|
||||
"Unit_Device_hexp2_Accuracy_Positive",
|
||||
"Unit_Device_h2exp2_Accuracy_Positive",
|
||||
"Unit_Device_hlog_Accuracy_Positive",
|
||||
"Unit_Device_h2log_Accuracy_Positive",
|
||||
"Unit_Device_hlog10_Accuracy_Positive",
|
||||
"Unit_Device_h2log10_Accuracy_Positive",
|
||||
"Unit_Device_hlog2_Accuracy_Positive",
|
||||
"Unit_Device_h2log2_Accuracy_Positive",
|
||||
"Unit_Device_hsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2sqrt_Accuracy_Positive",
|
||||
"Unit_Device_hceil_Accuracy_Positive",
|
||||
"Unit_Device_h2ceil_Accuracy_Positive",
|
||||
"Unit_Device_hfloor_Accuracy_Positive",
|
||||
"Unit_Device_h2floor_Accuracy_Positive",
|
||||
"Unit_Device_htrunc_Accuracy_Positive",
|
||||
"Unit_Device_h2trunc_Accuracy_Positive",
|
||||
"Unit_Device_hrcp_Accuracy_Positive",
|
||||
"Unit_Device_h2rcp_Accuracy_Positive",
|
||||
"Unit_Device_hrsqrt_Accuracy_Positive",
|
||||
"Unit_Device_h2rsqrt_Accuracy_Positive",
|
||||
"Unit_Device_hrint_Accuracy_Positive",
|
||||
"Unit_Device_h2rint_Accuracy_Positive",
|
||||
"Unit_Device___habs_Accuracy_Positive",
|
||||
"Unit_Device___habs2_Accuracy_Positive",
|
||||
"Unit_Device___hneg_Accuracy_Positive",
|
||||
"Unit_Device___hneg2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_wrapper_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_Accuracy_Positive",
|
||||
"Unit_Device___hadd_sat_Accuracy_Positive",
|
||||
"Unit_Device___hadd2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_Accuracy_Positive",
|
||||
"Unit_Device___hsub_sat_Accuracy_Positive",
|
||||
"Unit_Device___hsub2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_Accuracy_Positive",
|
||||
"Unit_Device___hmul_sat_Accuracy_Positive",
|
||||
"Unit_Device___hmul2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hdiv_Accuracy_Positive",
|
||||
"Unit_Device___h2div_Accuracy_Positive",
|
||||
"Unit_Device___hfma_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_Accuracy_Positive",
|
||||
"Unit_Device___hfma_sat_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_sat_Accuracy_Positive",
|
||||
"Unit_Device___hisinf_Accuracy_Positive",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___heq_Accuracy_Positive",
|
||||
"Unit_Device___hbeq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___heq2_Accuracy_Positive",
|
||||
"Unit_Device___hequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu_Accuracy_Positive",
|
||||
"Unit_Device___hbneu2_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hneu2_Accuracy_Positive",
|
||||
"Unit_Device___hge_Accuracy_Positive",
|
||||
"Unit_Device___hbge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hge2_Accuracy_Positive",
|
||||
"Unit_Device___hgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt_Accuracy_Positive",
|
||||
"Unit_Device___hbgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hgt2_Accuracy_Positive",
|
||||
"Unit_Device___hgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hle_Accuracy_Positive",
|
||||
"Unit_Device___hble2_Accuracy_Positive",
|
||||
"Unit_Device___hleu_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hle2_Accuracy_Positive",
|
||||
"Unit_Device___hleu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt_Accuracy_Positive",
|
||||
"Unit_Device___hblt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"Unit_Device___hlt2_Accuracy_Positive",
|
||||
"Unit_Device___hltu2_Accuracy_Positive",
|
||||
"Unit_Device___hmax_Accuracy_Positive",
|
||||
"Unit_Device___hmin_Accuracy_Positive",
|
||||
"Unit_Device___hmax_nan_Accuracy_Positive",
|
||||
"Unit_Device___hmin_nan_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2int_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2short_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rz_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_ru_Accuracy_Positive",
|
||||
"Unit_Device___half_as_short_Accuracy_Positive",
|
||||
"Unit_Device___half_as_ushort_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___int2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___short2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short_as_half_Accuracy_Positive",
|
||||
"Unit_Device___ushort_as_half_Accuracy_Positive",
|
||||
"Unit_Device___float2half_rn_Accuracy_Positive",
|
||||
"Unit_Device___float2half_Accuracy_Positive",
|
||||
"Unit_Device___half2float_Accuracy_Positive",
|
||||
"Unit_Device___frcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___frsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___expf_Accuracy_Positive",
|
||||
"Unit_Device___exp10f_Accuracy_Positive",
|
||||
"Unit_Device___logf_Accuracy_Positive",
|
||||
"Unit_Device___log2f_Accuracy_Positive",
|
||||
"Unit_Device___log10f_Accuracy_Positive",
|
||||
"Unit_Device___sinf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_sin_Accuracy_Positive",
|
||||
"Unit_Device___cosf_Accuracy_Positive",
|
||||
"Unit_Device___sincosf_cos_Accuracy_Positive",
|
||||
"Unit_Device___fadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___fsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___fmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fdividef_Accuracy_Positive",
|
||||
"Unit_Device___fmaf_rn_Accuracy_Positive",
|
||||
"Unit_Device___drcp_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___dadd_rn_Accuracy_Positive",
|
||||
"Unit_Device___dsub_rn_Accuracy_Positive",
|
||||
"Unit_Device___dmul_rn_Accuracy_Positive",
|
||||
"Unit_Device___ddiv_rn_Accuracy_Positive",
|
||||
"Unit_Device___fma_rn_Accuracy_Positive",
|
||||
"Unit_Device_sqrtf_Accuracy_Positive",
|
||||
"Unit_Device_sqrt_Accuracy_Positive",
|
||||
"Unit_Device_rsqrtf_Accuracy_Positive",
|
||||
"Unit_Device_rsqrt_Accuracy_Positive",
|
||||
"Unit_Device_cbrt_Accuracy_Positive - float",
|
||||
"Unit_Device_cbrt_Accuracy_Positive - double",
|
||||
"Unit_Device_rcbrtf_Accuracy_Positive",
|
||||
"Unit_Device_rcbrt_Accuracy_Positive",
|
||||
"Unit_Device_hypot_Accuracy_Positive - float",
|
||||
"Unit_Device_hypot_Accuracy_Positive - double",
|
||||
"Unit_Device_rhypot_Accuracy_Positive - float",
|
||||
"Unit_Device_rhypot_Accuracy_Positive - double",
|
||||
"Unit_Device_norm3d_Accuracy_Positive - float",
|
||||
"Unit_Device_norm3d_Accuracy_Positive - double",
|
||||
"Unit_Device_rnorm3d_Accuracy_Positive - float",
|
||||
"Unit_Device_rnorm3d_Accuracy_Positive - double",
|
||||
"Unit_Device_norm4d_Accuracy_Positive - float",
|
||||
"Unit_Device_norm4d_Accuracy_Positive - double",
|
||||
"Unit_Device_rnorm4d_Accuracy_Positive - float",
|
||||
"Unit_Device_rnorm4d_Accuracy_Positive - double",
|
||||
"Unit_Device_exp_Accuracy_Positive - float",
|
||||
"Unit_Device_exp_Accuracy_Positive - double",
|
||||
"Unit_Device_exp2_Accuracy_Positive - float",
|
||||
"Unit_Device_exp2_Accuracy_Positive - double",
|
||||
"Unit_Device_expm1_Accuracy_Positive - float",
|
||||
"Unit_Device_expm1_Accuracy_Positive - double",
|
||||
"Unit_Device_exp10f_Accuracy_Positive",
|
||||
"Unit_Device_exp10_Accuracy_Positive",
|
||||
"Unit_Device_frexpf_Accuracy_Positive",
|
||||
"Unit_Device_frexp_Accuracy_Positive",
|
||||
"Unit_Device_pow_Accuracy_Positive - float",
|
||||
"Unit_Device_pow_Accuracy_Positive - double",
|
||||
"Unit_Device_ldexp_Accuracy_Positive - float",
|
||||
"Unit_Device_ldexp_Accuracy_Positive - double",
|
||||
"Unit_Device_powi_Accuracy_Positive - float",
|
||||
"Unit_Device_powi_Accuracy_Positive - double",
|
||||
"Unit_Device_scalbn_Accuracy_Positive - float",
|
||||
"Unit_Device_scalbn_Accuracy_Positive - double",
|
||||
"Unit_Device_scalbln_Accuracy_Positive - float",
|
||||
"Unit_Device_scalbln_Accuracy_Positive - double",
|
||||
"Unit_Device_log_Accuracy_Positive - float",
|
||||
"Unit_Device_log_Accuracy_Positive - double",
|
||||
"Unit_Device_log2_Accuracy_Positive - float",
|
||||
"Unit_Device_log2_Accuracy_Positive - double",
|
||||
"Unit_Device_log10_Accuracy_Positive - float",
|
||||
"Unit_Device_log10_Accuracy_Positive - double",
|
||||
"Unit_Device_log1p_Accuracy_Positive - float",
|
||||
"Unit_Device_log1p_Accuracy_Positive - double",
|
||||
"Unit_Device_logb_Accuracy_Positive - float",
|
||||
"Unit_Device_logb_Accuracy_Positive - double",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"Unit_Device_erf_Accuracy_Positive - float",
|
||||
"Unit_Device_erf_Accuracy_Positive - double",
|
||||
"Unit_Device_erfc_Accuracy_Positive - float",
|
||||
"Unit_Device_erfc_Accuracy_Positive - double",
|
||||
"Unit_Device_erfinvf_Accuracy_Positive",
|
||||
"Unit_Device_erfinv_Accuracy_Positive",
|
||||
"Unit_Device_erfcinvf_Accuracy_Positive",
|
||||
"Unit_Device_erfcinv_Accuracy_Positive",
|
||||
"Unit_Device_normcdff_Accuracy_Positive",
|
||||
"Unit_Device_normcdf_Accuracy_Positive",
|
||||
"Unit_Device_tgammaf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_tgamma_Accuracy_Limited_Positive",
|
||||
"Unit_Device_lgammaf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_lgamma_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y0f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y0_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y1f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_y1_Accuracy_Limited_Positive",
|
||||
"Unit_Device_ynf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_yn_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j0f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j0_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j1f_Accuracy_Limited_Positive",
|
||||
"Unit_Device_j1_Accuracy_Limited_Positive",
|
||||
"Unit_Device_jnf_Accuracy_Limited_Positive",
|
||||
"Unit_Device_jn_Accuracy_Limited_Positive",
|
||||
"Unit_Device___double2int_rd_Positive",
|
||||
"Unit_Device___double2int_rn_Positive",
|
||||
"Unit_Device___double2int_ru_Positive",
|
||||
"Unit_Device___double2int_rz_Positive",
|
||||
"Unit_Device___double2int_Negative_RTC",
|
||||
"Unit_Device___double2uint_rd_Positive",
|
||||
"Unit_Device___double2uint_rn_Positive",
|
||||
"Unit_Device___double2uint_ru_Positive",
|
||||
"Unit_Device___double2uint_rz_Positive",
|
||||
"Unit_Device___double2uint_Negative_RTC",
|
||||
"Unit_Device___double2ll_rd_Positive",
|
||||
"Unit_Device___double2ll_rn_Positive",
|
||||
"Unit_Device___double2ll_ru_Positive",
|
||||
"Unit_Device___double2ll_rz_Positive",
|
||||
"Unit_Device___double2ll_Negative_RTC",
|
||||
"Unit_Device___double2ull_rd_Positive",
|
||||
"Unit_Device___double2ull_rn_Positive",
|
||||
"Unit_Device___double2ull_ru_Positive",
|
||||
"Unit_Device___double2ull_rz_Positive",
|
||||
"Unit_Device___double2ull_Negative_RTC",
|
||||
"Unit_Device___double2float_rd_Positive",
|
||||
"Unit_Device___double2float_rn_Positive",
|
||||
"Unit_Device___double2float_ru_Positive",
|
||||
"Unit_Device___double2float_rz_Positive",
|
||||
"Unit_Device___double2float_Negative_RTC",
|
||||
"Unit_Device___double2hiint_Positive",
|
||||
"Unit_Device___double2hiint_Negative_RTC",
|
||||
"Unit_Device___double2loint_Positive",
|
||||
"Unit_Device___double2loint_Negative_RTC",
|
||||
"Unit_Device___double_as_longlong_Positive",
|
||||
"Unit_Device___double_as_longlong_Negative_RTC",
|
||||
"Unit_Device___float2int_rd_Positive",
|
||||
"Unit_Device___float2int_rn_Positive",
|
||||
"Unit_Device___float2int_ru_Positive",
|
||||
"Unit_Device___float2int_rz_Positive",
|
||||
"Unit_Device___float2int_Negative_RTC",
|
||||
"Unit_Device___float2uint_rd_Positive",
|
||||
"Unit_Device___float2uint_rn_Positive",
|
||||
"Unit_Device___float2uint_ru_Positive",
|
||||
"Unit_Device___float2uint_rz_Positive",
|
||||
"Unit_Device___float2uint_Negative_RTC",
|
||||
"Unit_Device___float2ll_rd_Positive",
|
||||
"Unit_Device___float2ll_rn_Positive",
|
||||
"Unit_Device___float2ll_ru_Positive",
|
||||
"Unit_Device___float2ll_rz_Positive",
|
||||
"Unit_Device___float2ll_Negative_RTC",
|
||||
"Unit_Device___float2ull_rd_Positive",
|
||||
"Unit_Device___float2ull_rn_Positive",
|
||||
"Unit_Device___float2ull_ru_Positive",
|
||||
"Unit_Device___float2ull_rz_Positive",
|
||||
"Unit_Device___float2ull_Negative_RTC",
|
||||
"Unit_Device___float_as_int_Positive",
|
||||
"Unit_Device___float_as_int_Negative_RTC",
|
||||
"Unit_Device___float_as_uint_Positive",
|
||||
"Unit_Device___float_as_uint_Negative_RTC",
|
||||
"Unit_Device___int2float_rd_Positive",
|
||||
"Unit_Device___int2float_rn_Positive",
|
||||
"Unit_Device___int2float_ru_Positive",
|
||||
"Unit_Device___int2float_rz_Positive",
|
||||
"Unit_Device_int2float___Negative_RTC",
|
||||
"Unit_Device___uint2float_rd_Positive",
|
||||
"Unit_Device___uint2float_rn_Positive",
|
||||
"Unit_Device___uint2float_ru_Positive",
|
||||
"Unit_Device___uint2float_rz_Positive",
|
||||
"Unit_Device___uint2float_Negative_RTC",
|
||||
"Unit_Device___int2double_rn_Positive",
|
||||
"Unit_Device___int2double_Negative_RTC",
|
||||
"Unit_Device___uint2double_rn_Positive",
|
||||
"Unit_Device___uint2double_Negative_RTC",
|
||||
"Unit_Device___ll2float_rd_Positive",
|
||||
"Unit_Device___ll2float_rn_Positive",
|
||||
"Unit_Device___ll2float_ru_Positive",
|
||||
"Unit_Device___ll2float_rz_Positive",
|
||||
"Unit_Device___ll2float_Negative_RTC",
|
||||
"Unit_Device___ull2float_rd_Positive",
|
||||
"Unit_Device___ull2float_rn_Positive",
|
||||
"Unit_Device___ull2float_ru_Positive",
|
||||
"Unit_Device___ull2float_rz_Positive",
|
||||
"Unit_Device___ull2float_Negative_RTC",
|
||||
"Unit_Device___ll2double_rd_Positive",
|
||||
"Unit_Device___ll2double_rn_Positive",
|
||||
"Unit_Device___ll2double_ru_Positive",
|
||||
"Unit_Device___ll2double_rz_Positive",
|
||||
"Unit_Device___ll2double_Negative_RTC",
|
||||
"Unit_Device___ull2double_rd_Positive",
|
||||
"Unit_Device___ull2double_rn_Positive",
|
||||
"Unit_Device___ull2double_ru_Positive",
|
||||
"Unit_Device___ull2double_rz_Positive",
|
||||
"Unit_Device___ull2double_Negative_RTC",
|
||||
"Unit_Device___int_as_float_Positive",
|
||||
"Unit_Device___int_as_float_Negative_RTC",
|
||||
"Unit_Device___uint_as_float_Positive",
|
||||
"Unit_Device___uint_as_float_Negative_RTC",
|
||||
"Unit_Device___longlong_as_double_Positive",
|
||||
"Unit_Device___longlong_as_double_Negative_RTC",
|
||||
"Unit_Device___hiloint2double_Positive",
|
||||
"Unit_Device___hiloint2double_Negative_RTC",
|
||||
"SWDEV-447384, SWDEV-447932: These tests fail in gfx1100, gfx1101 & gfx1102",
|
||||
"SWDEV-445928: These tests fail in PSDB stress test on 09/02/2024",
|
||||
"Unit_Device___float2half_rd_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_ru_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_rz_Accuracy_Limited_Positive",
|
||||
"Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg",
|
||||
"=== SWDEV-511679 : Below tests fail in stress test ===",
|
||||
"Unit_hipIpcOpenMemHandle_Negative_Open_In_Two_Contexts_Same_Device",
|
||||
@@ -551,6 +70,67 @@
|
||||
"Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
|
||||
"=== SWDEV-553920 disabled until multi device graph issues are resolved ===",
|
||||
"Unit_hipGraphUpload_Functional_multidevice_test",
|
||||
"=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 31)]",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - double",
|
||||
"Unit_Device_cospi_Accuracy_Positive - double",
|
||||
"Unit_Device_sincospi_Accuracy_Positive - double",
|
||||
"=== special values test, fix: comment out [-3.0f, -1.5f, HEX_FLT(-, 0, 00000c, -, 126), HEX_FLT(-, 0, 000006, -, 126), +3.0f, 1.5f, HEX_FLT(+, 0, 00000c, -, 126), HEX_FLT(+, 0, 000006, -, 126),]",
|
||||
"Unit_Device_fma_Accuracy_Positive - float",
|
||||
"=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(-, 1, fffffffffffff, +, 30), HEX_DBL(+, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 30)]",
|
||||
"Unit_Device_fdim_Accuracy_Positive - double",
|
||||
"=== TODO special value test error, fix: comment out special value test",
|
||||
"Unit_Device_remquo_Accuracy_Positive - float",
|
||||
"Unit_Device_remquo_Accuracy_Positive - double",
|
||||
"=== float* can be casted to double* [math_remainder_rounding_negative_kernels_rtc.hh:247]",
|
||||
"Unit_Device_modf_modff_Negative_RTC",
|
||||
"=== TODO round error",
|
||||
"Unit_Device___fsqrt_rn_Accuracy_Positive",
|
||||
"Unit_Device___frsqrt_rn_Accuracy_Positive",
|
||||
"=== TODO terminated, ulp formula seems to be wrong [produces ulp of 4294967298]",
|
||||
"Unit_Device___expf_Accuracy_Positive",
|
||||
"Unit_Device___exp10f_Accuracy_Positive",
|
||||
"=== TODO error, input +-1.40129846e-45",
|
||||
"Unit_Device___log2f_Accuracy_Positive",
|
||||
"=== special values test, fix: comment out [HEX_DBL(-, 0, 0000000000001, -, 1022)]",
|
||||
"Unit_Device_log1p_Accuracy_Positive - double",
|
||||
"=== Below 2 tests are disable due to defect EXSWHTEC-369 ===",
|
||||
"Unit_Device_ilogbf_Accuracy_Positive",
|
||||
"Unit_Device_ilogb_Accuracy_Positive",
|
||||
"=== rounding float16 types not supported?",
|
||||
"Unit_Device___float2half_rd_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_ru_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_rz_Accuracy_Limited_Positive",
|
||||
"=== long int exponents are too large, works fine with int",
|
||||
"Unit_Device_scalbln_Accuracy_Positive - float",
|
||||
"Unit_Device_scalbln_Accuracy_Positive - double",
|
||||
"=== Float16 problem",
|
||||
"Unit_Device_hcos_Accuracy_Positive",
|
||||
"Unit_Device_h2cos_Accuracy_Positive",
|
||||
"Unit_Device_hsin_Accuracy_Positive",
|
||||
"Unit_Device_h2sin_Accuracy_Positive",
|
||||
"=== TODO [first 2 passed] Below 2 tests are disable due to defect EXSWHTEC-356 ===",
|
||||
"Unit_Device___hisinf2_Accuracy_Positive",
|
||||
"Unit_Device___hisnan2_Accuracy_Positive",
|
||||
"Unit_Device___hbequ2_Accuracy_Positive",
|
||||
"Unit_Device___hne_Accuracy_Positive",
|
||||
"Unit_Device___hne2_Accuracy_Positive",
|
||||
"Unit_Device___hbne2_Accuracy_Positive",
|
||||
"Unit_Device___hbgeu2_Accuracy_Positive",
|
||||
"Unit_Device___hbgtu2_Accuracy_Positive",
|
||||
"Unit_Device___hbleu2_Accuracy_Positive",
|
||||
"Unit_Device___hbltu2_Accuracy_Positive",
|
||||
"=== TODO ===",
|
||||
"Unit_Device_tgammaf_Accuracy_Limited_Positive",
|
||||
"=== TODO === fail on 100% test data",
|
||||
"Unit_Device_hexp10_Accuracy_Positive",
|
||||
"Unit_Device_h2exp10_Accuracy_Positive",
|
||||
"Unit_Device_hexp2_Accuracy_Positive",
|
||||
"Unit_Device_h2exp2_Accuracy_Positive",
|
||||
"Unit_Device_hlog_Accuracy_Positive",
|
||||
"Unit_Device_h2log_Accuracy_Positive",
|
||||
"Unit_Device_hlog10_Accuracy_Positive",
|
||||
"Unit_Device_h2log10_Accuracy_Positive",
|
||||
"Unit_Device___hfma2_Accuracy_Positive",
|
||||
#endif
|
||||
#if defined gfx90a || defined gfx942 || defined gfx950
|
||||
"=== SWDEV-443630 : Below test failed in stress test on 19/01/24 ===",
|
||||
@@ -575,13 +155,6 @@
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - double",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - __half",
|
||||
"Unit_Warp_Shfl_Down_Positive_Basic - __half2",
|
||||
"Unit_Device_norm_Sanity_Positive - float",
|
||||
"Unit_Device_norm_Sanity_Positive - double",
|
||||
"Unit_Device_rnorm_Sanity_Positive - float",
|
||||
"Unit_Device_rnorm_Sanity_Positive - double",
|
||||
"Unit_Device___float2half_rd_SmallVals_Sanity_Positive",
|
||||
"Unit_Device___float2half_ru_SmallVals_Sanity_Positive",
|
||||
"Unit_Device___float2half_rz_SmallVals_Sanity_Positive",
|
||||
#endif
|
||||
#if defined gfx1200 || defined gfx1201
|
||||
"=== SWDEV-470751 : Fine Grain memory is MTYPE_NC due to HW bug.",
|
||||
|
||||
@@ -12,5 +12,60 @@
|
||||
"=== Below tests are failing PSDB ===",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"Unit_hipMemMapArrayAsync_Positive_Basic",
|
||||
"=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 31)]",
|
||||
"Unit_Device_sinpi_Accuracy_Positive - double",
|
||||
"Unit_Device_cospi_Accuracy_Positive - double",
|
||||
"Unit_Device_sincospi_Accuracy_Positive - double",
|
||||
"=== special values test, fix: comment out [-3.0f, -1.5f, HEX_FLT(-, 0, 00000c, -, 126), HEX_FLT(-, 0, 000006, -, 126), +3.0f, 1.5f, HEX_FLT(+, 0, 00000c, -, 126), HEX_FLT(+, 0, 000006, -, 126),]",
|
||||
"Unit_Device_fma_Accuracy_Positive - float",
|
||||
"=== special values test, fix: comment out [HEX_DBL(-, 1, fffffffffffff, +, 31), HEX_DBL(-, 1, fffffffffffff, +, 30), HEX_DBL(+, 1, fffffffffffff, +, 31), HEX_DBL(+, 1, fffffffffffff, +, 30)]",
|
||||
"Unit_Device_fdim_Accuracy_Positive - double",
|
||||
"=== TODO special value test error, fix: comment out special value test",
|
||||
"Unit_Device_remquo_Accuracy_Positive - float",
|
||||
"Unit_Device_remquo_Accuracy_Positive - double",
|
||||
"=== float* can be casted to double* [math_remainder_rounding_negative_kernels_rtc.hh:247]",
|
||||
"Unit_Device_modf_modff_Negative_RTC",
|
||||
"=== TODO round error",
|
||||
"Unit_Device___frsqrt_rn_Accuracy_Positive",
|
||||
"=== TODO terminated, ulp formula seems to be wrong [produces ulp of 4294967298]",
|
||||
"Unit_Device___expf_Accuracy_Positive",
|
||||
"Unit_Device___exp10f_Accuracy_Positive",
|
||||
"=== rounding float16 types not supported?",
|
||||
"Unit_Device___float2half_rd_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_ru_Accuracy_Limited_Positive",
|
||||
"Unit_Device___float2half_rz_Accuracy_Limited_Positive",
|
||||
"=== TODO rounding error ===",
|
||||
"Unit_Device___half2int_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2int_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2int_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2uint_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2short_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2short_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ushort_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_rd_Accuracy_Positive",
|
||||
"Unit_Device___half2ll_ru_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_rn_Accuracy_Positive",
|
||||
"Unit_Device___half2ull_ru_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___int2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___int2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___uint2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___short2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___short2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ushort2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_rd_Accuracy_Positive",
|
||||
"Unit_Device___ll2half_ru_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rz_Accuracy_Positive",
|
||||
"Unit_Device___ull2half_rd_Accuracy_Positive"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -59,13 +59,19 @@ int main(int argc, char** argv) {
|
||||
("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)")
|
||||
| Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory")
|
||||
["-M"]["--accuracy-max-memory"]
|
||||
("Percentage of global device memory allowed for math accuracy tests (default: 80%)")
|
||||
("Percentage of global device memory allowed for math accuracy tests in case the global device memory is lower than max_memory (default: 80%)")
|
||||
| Opt(cmd_options.reduce_iterations, "reduce_iterations")
|
||||
["-R"]["--reduce-iterations"]
|
||||
("Number of iterations for fuzzing reduce operations (default: 1)")
|
||||
| Opt(cmd_options.reduce_input_size, "reduce_input_size")
|
||||
["-Z"]["--reduce-input-size"]
|
||||
("Size of the input for the reduce sync operations performance test (megabytes) (default: 50)")
|
||||
| Opt(cmd_options.max_memory, "max_memory")
|
||||
["-X"]["--max-memory"]
|
||||
("Maximum amount of memory to use for math accuracy tests (default: 2GB)")
|
||||
| Opt(cmd_options.reduction_factor, "reduction_factor")
|
||||
["-R"]["--reduction-factor"]
|
||||
("Percentage of test data to be actually tested (default: 0.1%)")
|
||||
;
|
||||
// clang-format on
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ struct CmdOptions {
|
||||
uint64_t reduce_iterations = 1;
|
||||
uint64_t reduce_input_size = 50;
|
||||
int accuracy_max_memory = 80;
|
||||
uint64_t max_memory = 2147483648; // 2 GB
|
||||
double reduction_factor = 0.1;
|
||||
};
|
||||
|
||||
extern CmdOptions cmd_options;
|
||||
|
||||
@@ -35,7 +35,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
if constexpr (std::is_same_v<float, T>) { \
|
||||
ys[i] = func_name##f(x1s[i], x2s[i]); \
|
||||
} else if constexpr (std::is_same_v<double, T>) { \
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(xs[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -42,7 +42,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -93,7 +93,10 @@ void CastUnaryHalfPrecisionBruteForceTest(kernel_sig<T, Float16> kernel,
|
||||
ref_sig<RT, RTArg> ref_func,
|
||||
const ValidatorBuilder& validator_builder) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
uint64_t stop = std::numeric_limits<uint16_t>::max() + 1ul;
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto inv_reduction_factor = 1 / reduction_factor;
|
||||
const auto stop = static_cast<uint64_t>(
|
||||
std::ceil((std::numeric_limits<uint16_t>::max() + 1ul) * reduction_factor));
|
||||
const auto max_batch_size =
|
||||
std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop);
|
||||
LinearAllocGuard<Float16> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)};
|
||||
@@ -114,10 +117,11 @@ void CastUnaryHalfPrecisionBruteForceTest(kernel_sig<T, Float16> kernel,
|
||||
const auto sub_batch_size = min_sub_batch_size + (i < tail);
|
||||
|
||||
thread_pool.Post([=, &values] {
|
||||
auto t = v;
|
||||
auto t = v * inv_reduction_factor;
|
||||
uint16_t val;
|
||||
for (auto j = 0u; j < sub_batch_size; ++j) {
|
||||
val = static_cast<uint16_t>(t++);
|
||||
val = static_cast<uint16_t>(std::floor(t));
|
||||
t += inv_reduction_factor;
|
||||
values.ptr()[base_idx + j] = *reinterpret_cast<Float16*>(&val);
|
||||
if (std::isnan(values.ptr()[base_idx + j]) || std::isinf(values.ptr()[base_idx + j])) {
|
||||
values.ptr()[base_idx + j] = 0;
|
||||
@@ -178,15 +182,24 @@ void CastIntRangeTest(kernel_sig<T, TArg> kernel, ref_sig<RT, RTArg> ref_func,
|
||||
const TArg a = std::numeric_limits<TArg>::lowest(),
|
||||
const TArg b = std::numeric_limits<TArg>::max()) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto inv_reduction_factor = 1 / reduction_factor;
|
||||
const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg));
|
||||
LinearAllocGuard<TArg> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)};
|
||||
|
||||
MathTest math_test(kernel, max_batch_size);
|
||||
|
||||
bool running = true;
|
||||
size_t inserted = 0u;
|
||||
for (TArg v = a; v <= b; v++) {
|
||||
values.ptr()[inserted++] = v;
|
||||
if (inserted < max_batch_size) continue;
|
||||
auto v = static_cast<double>(a);
|
||||
while (running) {
|
||||
if (std::floor(v) > b) {
|
||||
running = false;
|
||||
} else {
|
||||
values.ptr()[inserted++] = static_cast<TArg>(std::floor(v));
|
||||
v += inv_reduction_factor;
|
||||
if (inserted < max_batch_size) continue;
|
||||
}
|
||||
|
||||
math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr());
|
||||
inserted = 0u;
|
||||
@@ -240,17 +253,27 @@ void CastBinaryIntRangeTest(kernel_sig<T1, T2, T2> kernel, ref_sig<T1, T2, T2> r
|
||||
const T2 a = std::numeric_limits<T2>::lowest(),
|
||||
const T2 b = std::numeric_limits<T2>::max()) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto inv_reduction_factor = 1 / reduction_factor;
|
||||
const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) + 2 * sizeof(T2));
|
||||
LinearAllocGuard<T2> values1{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)};
|
||||
LinearAllocGuard<T2> values2{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)};
|
||||
|
||||
MathTest math_test(kernel, max_batch_size);
|
||||
|
||||
bool running = true;
|
||||
size_t inserted = 0u;
|
||||
for (T2 v = a; v <= b; v++) {
|
||||
values1.ptr()[inserted] = v;
|
||||
values2.ptr()[inserted++] = b - v;
|
||||
if (inserted < max_batch_size) continue;
|
||||
auto v = static_cast<double>(a);
|
||||
while (running) {
|
||||
if (std::floor(v) > b) {
|
||||
running = false;
|
||||
} else {
|
||||
const auto t = static_cast<T2>(std::floor(v));
|
||||
values1.ptr()[inserted] = t;
|
||||
values2.ptr()[inserted++] = b - t;
|
||||
v += inv_reduction_factor;
|
||||
if (inserted < max_batch_size) continue;
|
||||
}
|
||||
|
||||
math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values1.ptr(),
|
||||
values2.ptr());
|
||||
|
||||
@@ -31,7 +31,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(__half2{xs[i], -xs[i]}); \
|
||||
} \
|
||||
}
|
||||
@@ -42,7 +42,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(__half2{x1s[i], -x1s[i]}, __half2{x2s[i], -x2s[i]}); \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -318,7 +318,7 @@ __global__ void __float22half2_rn_kernel(__half2* const ys, const size_t num_xs,
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
ys[i] = __float22half2_rn(make_float2(xs[i], -xs[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ THE SOFTWARE.
|
||||
TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Limited_Positive") { \
|
||||
Float16 (*ref)(float) = kern_name##_ref; \
|
||||
UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<Float16>(), \
|
||||
std::numeric_limits<float>::min(), 0.f); \
|
||||
std::numeric_limits<float>::lowest(), 0.f); \
|
||||
UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<Float16>(), \
|
||||
0.0001f, std::numeric_limits<float>::max()); \
|
||||
}
|
||||
@@ -48,7 +48,7 @@ THE SOFTWARE.
|
||||
TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \
|
||||
Float16 (*ref)(float) = kern_name##_ref; \
|
||||
UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<Float16>(), \
|
||||
std::numeric_limits<float>::min(), \
|
||||
std::numeric_limits<float>::lowest(), \
|
||||
std::numeric_limits<float>::max()); \
|
||||
}
|
||||
|
||||
|
||||
@@ -688,7 +688,7 @@ __global__ void __hiloint2double_kernel(double* const ys, const size_t num_xs, i
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
ys[i] = __hiloint2double(x1s[i], x2s[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ THE SOFTWARE.
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(xs[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -100,7 +100,7 @@ MATH_UNARY_DP_TEST_DEF_IMPL(__dsqrt_rn, static_cast<double (*)(double)>(std::sqr
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -206,7 +206,7 @@ MATH_BINARY_DP_TEST_DEF_IMPL(__ddiv_rn, __ddiv_rn_ref, EqValidatorBuilderFactory
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(xs[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -59,7 +59,7 @@ THE SOFTWARE.
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -85,7 +85,7 @@ THE SOFTWARE.
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(xs[i]); \
|
||||
} \
|
||||
} \
|
||||
@@ -120,7 +120,7 @@ MATH_UNARY_HP_TEST_DEF_IMPL(__hisnan2, __hisnan2_ref, EqValidatorBuilderFactory<
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i]); \
|
||||
} \
|
||||
} \
|
||||
|
||||
@@ -35,7 +35,8 @@ THE SOFTWARE.
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `logf(x)` for all possible inputs and `log(x)` against a
|
||||
* table of difficult values, followed by a large number of randomly generated values. The results
|
||||
* are compared against reference function `T std::log(T)`. The maximum ulp error is 1.
|
||||
* are compared against reference function `T std::log(T)`. The maximum ulp error
|
||||
* for single precision is 2 and for double precision is 1.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
@@ -44,7 +45,7 @@ THE SOFTWARE.
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log, 1, 1)
|
||||
MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log, 2, 1)
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
@@ -187,7 +188,7 @@ __global__ void ilogb_kernel(int* const ys, const size_t num_xs, T* const xs) {
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
if constexpr (std::is_same_v<float, T>) {
|
||||
ys[i] = ilogbf(xs[i]);
|
||||
} else if constexpr (std::is_same_v<double, T>) {
|
||||
|
||||
@@ -212,10 +212,17 @@ template <typename F> auto GetOccupancyMaxPotentialBlockSize(F kernel) {
|
||||
inline size_t GetMaxAllowedDeviceMemoryUsage() {
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
return props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f);
|
||||
return props.totalGlobalMem > cmd_options.max_memory
|
||||
? cmd_options.max_memory
|
||||
: props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f);
|
||||
}
|
||||
|
||||
inline uint64_t GetTestIterationCount() { return cmd_options.accuracy_iterations; }
|
||||
inline double GetTestReductionFactor() { return cmd_options.reduction_factor * 0.01; }
|
||||
|
||||
inline uint64_t GetTestIterationCount() {
|
||||
return static_cast<uint64_t>(
|
||||
std::ceil(cmd_options.accuracy_iterations * GetTestReductionFactor()));
|
||||
}
|
||||
|
||||
template <typename T, typename... Ts> using kernel_sig = void (*)(T*, const size_t, Ts*...);
|
||||
|
||||
@@ -254,3 +261,71 @@ template <int error_num> void NegativeTestRTCWrapper(const char* program_source)
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
|
||||
inline void SinglePrecisionReducedRun(std::function<void(size_t)> run,
|
||||
const LinearAllocGuard<float>& values, const float a,
|
||||
const float b, const double reduction_factor,
|
||||
const size_t max_batch_size) {
|
||||
bool test_positive = true;
|
||||
float positive_start = 0.0f;
|
||||
float positive_end = 0.0f;
|
||||
bool test_negative = true;
|
||||
float negative_start = 0.0f;
|
||||
float negative_end = 0.0f;
|
||||
if (a < 0 && b <= 0) {
|
||||
test_positive = false;
|
||||
negative_start = -b;
|
||||
negative_end = -a;
|
||||
}
|
||||
if (a < 0 && b > 0) {
|
||||
positive_start = 0.0f;
|
||||
positive_end = b;
|
||||
negative_start = 0.0f;
|
||||
negative_end = -a;
|
||||
}
|
||||
if (a >= 0 && b > 0) {
|
||||
positive_start = a;
|
||||
positive_end = b;
|
||||
test_negative = false;
|
||||
}
|
||||
|
||||
const auto inv_reduction_factor = 1 / reduction_factor;
|
||||
size_t inserted = 0u;
|
||||
|
||||
constexpr int radix = std::numeric_limits<float>::radix;
|
||||
|
||||
float increment = std::numeric_limits<float>::min() * std::numeric_limits<float>::epsilon();
|
||||
float limit = std::numeric_limits<float>::min() * radix;
|
||||
|
||||
const auto iterate = [&](float start, float end, int positive) {
|
||||
for (float v = start; v < end; limit *= radix, increment *= radix) {
|
||||
const auto start_v = v;
|
||||
double count = 0ul;
|
||||
while (v < limit && v < end) {
|
||||
values.ptr()[inserted++] = (v * positive);
|
||||
count += inv_reduction_factor;
|
||||
v = start_v + increment * static_cast<uint64_t>(std::floor(count));
|
||||
if (inserted < max_batch_size) continue;
|
||||
|
||||
run(inserted);
|
||||
inserted = 0u;
|
||||
}
|
||||
}
|
||||
|
||||
if (inserted > 0u) {
|
||||
run(inserted);
|
||||
inserted = 0u;
|
||||
}
|
||||
};
|
||||
|
||||
if (test_positive) {
|
||||
iterate(positive_start, positive_end, 1);
|
||||
}
|
||||
|
||||
increment = std::numeric_limits<float>::min() * std::numeric_limits<float>::epsilon();
|
||||
limit = std::numeric_limits<float>::min() * radix;
|
||||
|
||||
if (test_negative) {
|
||||
iterate(negative_start, negative_end, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,12 +37,12 @@ TEST_CASE("Unit_Device_fmax_fmaxf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFm
|
||||
MATH_BINARY_WITHIN_ULP_TEST_DEF(fmin, std::fmin, 0, 0)
|
||||
TEST_CASE("Unit_Device_fmin_fminf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmin); }
|
||||
|
||||
MATH_BINARY_WITHIN_ULP_TEST_DEF(nextafter, std::nextafter, 0, 0)
|
||||
MATH_BINARY_WITHIN_ULP_TEST_DEF(nextafter, std::nextafter, 1, 1)
|
||||
TEST_CASE("Unit_Device_nextafter_nextafterf_Negative_RTC") {
|
||||
NegativeTestRTCWrapper<8>(kNextAfter);
|
||||
}
|
||||
|
||||
MATH_TERNARY_WITHIN_ULP_TEST_DEF(fma, std::fma, 0, 0)
|
||||
MATH_TERNARY_WITHIN_ULP_TEST_DEF(fma, std::fma, 0, 1)
|
||||
TEST_CASE("Unit_Device_fma_fmaf_Negative_RTC") { NegativeTestRTCWrapper<12>(kFma); }
|
||||
|
||||
__global__ void fdividef_kernel(float* const ys, const size_t num_xs, float* const x1s,
|
||||
@@ -50,7 +50,7 @@ __global__ void fdividef_kernel(float* const ys, const size_t num_xs, float* con
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
ys[i] = fdividef(x1s[i], x2s[i]);
|
||||
}
|
||||
}
|
||||
@@ -68,7 +68,7 @@ TEST_CASE("Unit_Device_fdividef_Negative_RTC") { NegativeTestRTCWrapper<4>(kFdiv
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = kern_name(xs[i]); \
|
||||
} \
|
||||
} \
|
||||
|
||||
@@ -123,9 +123,9 @@ class Dummy {
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
__global__ void fdividef_kernel_v1(float* x, float y) { float result = fdividef(x, y); }
|
||||
__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdivide(x); }
|
||||
__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdivide(x); }
|
||||
__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdivide(x); }
|
||||
__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdividef(x, y); }
|
||||
__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdividef(x, y); }
|
||||
__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdividef(x, y); }
|
||||
)"};
|
||||
|
||||
static constexpr auto kIsFinite{R"(
|
||||
|
||||
@@ -35,7 +35,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
if constexpr (std::is_same_v<float, T1>) { \
|
||||
ys[i] = func_name##f(x1s[i], x2s[i]); \
|
||||
} else if constexpr (std::is_same_v<double, T1>) { \
|
||||
|
||||
@@ -183,7 +183,7 @@ __global__ void frexp_kernel(std::pair<T, int>* const ys, const size_t num_xs, T
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
if constexpr (std::is_same_v<float, T>) {
|
||||
ys[i].first = frexpf(xs[i], &ys[i].second);
|
||||
} else if constexpr (std::is_same_v<double, T>) {
|
||||
|
||||
@@ -35,7 +35,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
if constexpr (std::is_same_v<float, T>) { \
|
||||
ys[i] = func_name##f(x1s[i], x2s[i], x3s[i], x4s[i]); \
|
||||
} else if constexpr (std::is_same_v<double, T>) { \
|
||||
|
||||
@@ -91,7 +91,7 @@ template <typename T> __global__ void remquo_kernel(std::pair<T, int>* const ys,
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
if constexpr (std::is_same_v<float, T>) {
|
||||
ys[i].first = remquof(x1s[i], x2s[i], &ys[i].second);
|
||||
} else if constexpr (std::is_same_v<double, T>) {
|
||||
@@ -123,7 +123,7 @@ __global__ void modf_kernel(std::pair<T, T>* const ys, const size_t num_xs, T* c
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
if constexpr (std::is_same_v<float, T>) {
|
||||
ys[i].first = modff(xs[i], &ys[i].second);
|
||||
} else if constexpr (std::is_same_v<double, T>) {
|
||||
|
||||
@@ -265,7 +265,7 @@ MATH_BINARY_KERNEL_DEF(rhypot)
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `rhypotf(x, y)` and `rhypot(x, y)`against a table of
|
||||
* difficult values, followed by a large number of randomly generated values. The maximum ulp error
|
||||
* for single precision is 2 and for double precision is 1.
|
||||
* is 2.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
@@ -278,7 +278,7 @@ TEMPLATE_TEST_CASE("Unit_Device_rhypot_Accuracy_Positive", "", float, double) {
|
||||
using RT = RefType_t<TestType>;
|
||||
auto rhypot_ref = [](RT arg1, RT arg2) -> RT { return 1. / std::hypot(arg1, arg2); };
|
||||
RT (*ref)(RT, RT) = rhypot_ref;
|
||||
const auto ulp = std::is_same_v<float, TestType> ? 2 : 1;
|
||||
const auto ulp = std::is_same_v<float, TestType> ? 2 : 2;
|
||||
BinaryFloatingPointTest(rhypot_kernel<TestType>, ref, ULPValidatorBuilderFactory<TestType>(ulp));
|
||||
}
|
||||
|
||||
@@ -348,7 +348,7 @@ MATH_TERNARY_KERNEL_DEF(rnorm3d)
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `rnorm3df(x, y, z)` and `rnorm3d(x, y, z)`against a table of
|
||||
* difficult values, followed by a large number of randomly generated values. The maximum ulp error
|
||||
* for single precision is 2 and for double precision is 1.
|
||||
* is 2.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
@@ -366,7 +366,7 @@ TEMPLATE_TEST_CASE("Unit_Device_rnorm3d_Accuracy_Positive", "", float, double) {
|
||||
return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3);
|
||||
};
|
||||
RT (*ref)(RT, RT, RT) = rnorm3d_ref;
|
||||
const auto ulp = std::is_same_v<float, TestType> ? 2 : 1;
|
||||
const auto ulp = std::is_same_v<float, TestType> ? 2 : 2;
|
||||
TernaryFloatingPointTest(rnorm3d_kernel<TestType>, ref,
|
||||
ULPValidatorBuilderFactory<TestType>(ulp));
|
||||
}
|
||||
@@ -438,7 +438,7 @@ MATH_QUATERNARY_KERNEL_DEF(rnorm4d)
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `rnorm4df(x, y, z, t)` and `rnorm4d(x, y, z, t)`against a
|
||||
* table of difficult values, followed by a large number of randomly generated values. The maximum
|
||||
* ulp error for single precision is 2 and for double precision is 1.
|
||||
* ulp error is 2.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
@@ -456,7 +456,7 @@ TEMPLATE_TEST_CASE("Unit_Device_rnorm4d_Accuracy_Positive", "", float, double) {
|
||||
return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4);
|
||||
};
|
||||
RT (*ref)(RT, RT, RT, RT) = rnorm4d_ref;
|
||||
const auto ulp = std::is_same_v<float, TestType> ? 2 : 1;
|
||||
const auto ulp = std::is_same_v<float, TestType> ? 2 : 2;
|
||||
QuaternaryFloatingPointTest(rnorm4d_kernel<TestType>, ref,
|
||||
ULPValidatorBuilderFactory<TestType>(ulp));
|
||||
}
|
||||
@@ -488,7 +488,7 @@ TEST_CASE("Unit_Device_rnorm4d_rnorm4df_Negative_RTC") { NegativeTestRTCWrapper<
|
||||
|
||||
template <typename T, typename F, typename RF, typename ValidatorBuilder>
|
||||
void NormSimpleTest(F kernel, RF ref_func, const ValidatorBuilder& validator_builder) {
|
||||
const auto max_dim = 10000;
|
||||
const auto max_dim = static_cast<uint64_t>(std::ceil(10000 * GetTestReductionFactor()));
|
||||
|
||||
LinearAllocGuard<T> x{LinearAllocs::hipHostMalloc, max_dim * sizeof(T)};
|
||||
LinearAllocGuard<T> x_dev{LinearAllocs::hipMalloc, max_dim * sizeof(T)};
|
||||
|
||||
@@ -33,7 +33,7 @@ THE SOFTWARE.
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(xs[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -359,7 +359,7 @@ MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_cos, static_cast<double (*)(double)>(std::
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i]); \
|
||||
} \
|
||||
}
|
||||
@@ -493,7 +493,7 @@ MATH_BINARY_SP_TEST_DEF(__fdividef, __fdiv_rn_ref);
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
if constexpr (std::is_same_v<float, T>) { \
|
||||
ys[i] = func_name##f(n[i], xs[i]); \
|
||||
} else if constexpr (std::is_same_v<double, T>) { \
|
||||
@@ -96,6 +96,7 @@ void BesselSinglePrecisionRangeTest(kernel_bessel_n_sig<float> kernel,
|
||||
const ValidatorBuilder& validator_builder, int n_input,
|
||||
const float a, const float b) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) * 2 + sizeof(int));
|
||||
LinearAllocGuard<int> x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(int)};
|
||||
LinearAllocGuard<float> x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)};
|
||||
@@ -103,15 +104,11 @@ void BesselSinglePrecisionRangeTest(kernel_bessel_n_sig<float> kernel,
|
||||
MathTest math_test(kernel, max_batch_size);
|
||||
std::fill_n(x1s.ptr(), max_batch_size, n_input);
|
||||
|
||||
size_t inserted = 0u;
|
||||
for (float v = a; v != b; v = std::nextafter(v, b)) {
|
||||
x2s.ptr()[inserted++] = v;
|
||||
if (inserted < max_batch_size) continue;
|
||||
const auto run = [&, gs = grid_size, bs = block_size](size_t inserted) {
|
||||
math_test.Run(validator_builder, gs, bs, ref_func, inserted, x1s.ptr(), x2s.ptr());
|
||||
};
|
||||
|
||||
math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, x1s.ptr(),
|
||||
x2s.ptr());
|
||||
inserted = 0u;
|
||||
}
|
||||
SinglePrecisionReducedRun(run, x2s, a, b, reduction_factor, max_batch_size);
|
||||
}
|
||||
|
||||
template <typename T, typename F, typename ValidatorBuilder>
|
||||
|
||||
@@ -103,7 +103,7 @@ MATH_UNARY_KERNEL_DEF(erfinv)
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `erfinvf(x)` for all possible inputs. The results are
|
||||
* compared against reference function `double boost::math::erf_inv(double)`. The maximum ulp error
|
||||
* is 2.
|
||||
* is 4.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
@@ -124,7 +124,7 @@ TEST_CASE("Unit_Device_erfinvf_Accuracy_Positive") {
|
||||
return boost::math::erf_inv(arg);
|
||||
};
|
||||
double (*ref)(double) = erfinv_ref;
|
||||
UnarySinglePrecisionTest(erfinv_kernel<float>, ref, ULPValidatorBuilderFactory<float>(2));
|
||||
UnarySinglePrecisionTest(erfinv_kernel<float>, ref, ULPValidatorBuilderFactory<float>(4));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -535,9 +535,9 @@ MATH_UNARY_KERNEL_DEF(lgamma)
|
||||
*/
|
||||
TEST_CASE("Unit_Device_lgammaf_Accuracy_Limited_Positive") {
|
||||
double (*ref)(double) = std::lgamma;
|
||||
UnarySinglePrecisionRangeTest(lgamma_kernel<float>, ref, ULPValidatorBuilderFactory<float>(6),
|
||||
UnarySinglePrecisionRangeTest(lgamma_kernel<float>, ref, ULPValidatorBuilderFactory<float>(7),
|
||||
std::numeric_limits<float>::lowest(), -11.0001f);
|
||||
UnarySinglePrecisionRangeTest(lgamma_kernel<float>, ref, ULPValidatorBuilderFactory<float>(6),
|
||||
UnarySinglePrecisionRangeTest(lgamma_kernel<float>, ref, ULPValidatorBuilderFactory<float>(7),
|
||||
-2.2636f, std::numeric_limits<float>::max());
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
if constexpr (std::is_same_v<float, T>) { \
|
||||
ys[i] = func_name##f(x1s[i], x2s[i], x3s[i]); \
|
||||
} else if constexpr (std::is_same_v<double, T>) { \
|
||||
|
||||
@@ -36,7 +36,7 @@ TEST_CASE("Unit_Device_cos_cosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCos)
|
||||
MATH_UNARY_WITHIN_ULP_TEST_DEF(tan, std::tan, 4, 2)
|
||||
TEST_CASE("Unit_Device_tan_tanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTan); }
|
||||
|
||||
MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 2, 2)
|
||||
MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 3, 2)
|
||||
TEST_CASE("Unit_Device_asin_asinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsin); }
|
||||
|
||||
MATH_UNARY_WITHIN_ULP_TEST_DEF(acos, std::acos, 2, 2)
|
||||
@@ -78,7 +78,7 @@ __global__ void sincos_kernel(std::pair<T, T>* const ys, const size_t num_xs, T*
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
if constexpr (std::is_same_v<float, T>) {
|
||||
sincosf(xs[i], &ys[i].first, &ys[i].second);
|
||||
} else if constexpr (std::is_same_v<double, T>) {
|
||||
@@ -109,7 +109,7 @@ __global__ void sincospi_kernel(std::pair<T, T>* const ys, const size_t num_xs,
|
||||
const auto tid = cg::this_grid().thread_rank();
|
||||
const auto stride = cg::this_grid().size();
|
||||
|
||||
for (auto i = tid; i < num_xs; i += stride) {
|
||||
for (size_t i = tid; i < num_xs; i += stride) {
|
||||
if constexpr (std::is_same_v<float, T>) {
|
||||
sincospif(xs[i], &ys[i].first, &ys[i].second);
|
||||
} else if constexpr (std::is_same_v<double, T>) {
|
||||
|
||||
@@ -34,7 +34,7 @@ namespace cg = cooperative_groups;
|
||||
const auto tid = cg::this_grid().thread_rank(); \
|
||||
const auto stride = cg::this_grid().size(); \
|
||||
\
|
||||
for (auto i = tid; i < num_xs; i += stride) { \
|
||||
for (size_t i = tid; i < num_xs; i += stride) { \
|
||||
if constexpr (std::is_same_v<float, T>) { \
|
||||
ys[i] = func_name##f(xs[i]); \
|
||||
} else if constexpr (std::is_same_v<double, T>) { \
|
||||
@@ -47,7 +47,10 @@ template <typename T, typename RT, typename RTArg, typename ValidatorBuilder>
|
||||
void UnaryHalfPrecisionBruteForceTest(kernel_sig<T, Float16> kernel, ref_sig<RT, RTArg> ref_func,
|
||||
const ValidatorBuilder& validator_builder) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
uint64_t stop = std::numeric_limits<uint16_t>::max() + 1ul;
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto inv_reduction_factor = 1 / reduction_factor;
|
||||
const auto stop = static_cast<uint64_t>(
|
||||
std::ceil((std::numeric_limits<uint16_t>::max() + 1ul) * reduction_factor));
|
||||
const auto max_batch_size =
|
||||
std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop);
|
||||
LinearAllocGuard<Float16> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)};
|
||||
@@ -68,10 +71,11 @@ void UnaryHalfPrecisionBruteForceTest(kernel_sig<T, Float16> kernel, ref_sig<RT,
|
||||
const auto sub_batch_size = min_sub_batch_size + (i < tail);
|
||||
|
||||
thread_pool.Post([=, &values] {
|
||||
auto t = v;
|
||||
auto t = v * inv_reduction_factor;
|
||||
uint16_t val;
|
||||
for (auto j = 0u; j < sub_batch_size; ++j) {
|
||||
val = static_cast<uint16_t>(t++);
|
||||
val = static_cast<uint16_t>(std::floor(t));
|
||||
t += inv_reduction_factor;
|
||||
values.ptr()[base_idx + j] = *reinterpret_cast<Float16*>(&val);
|
||||
}
|
||||
});
|
||||
@@ -90,7 +94,10 @@ template <typename T, typename RT, typename RTArg, typename ValidatorBuilder>
|
||||
void UnarySinglePrecisionBruteForceTest(kernel_sig<T, float> kernel, ref_sig<RT, RTArg> ref_func,
|
||||
const ValidatorBuilder& validator_builder) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
uint64_t stop = std::numeric_limits<uint32_t>::max() + 1ul;
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto inv_reduction_factor = 1 / reduction_factor;
|
||||
const auto stop = static_cast<uint64_t>(
|
||||
std::ceil((std::numeric_limits<uint32_t>::max() + 1ul) * reduction_factor));
|
||||
const auto max_batch_size =
|
||||
std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)), stop);
|
||||
LinearAllocGuard<float> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)};
|
||||
@@ -111,10 +118,11 @@ void UnarySinglePrecisionBruteForceTest(kernel_sig<T, float> kernel, ref_sig<RT,
|
||||
const auto sub_batch_size = min_sub_batch_size + (i < tail);
|
||||
|
||||
thread_pool.Post([=, &values] {
|
||||
auto t = v;
|
||||
auto t = v * inv_reduction_factor;
|
||||
uint32_t val;
|
||||
for (auto j = 0u; j < sub_batch_size; ++j) {
|
||||
val = static_cast<uint32_t>(t++);
|
||||
val = static_cast<uint32_t>(std::floor(t));
|
||||
t += inv_reduction_factor;
|
||||
values.ptr()[base_idx + j] = *reinterpret_cast<float*>(&val);
|
||||
}
|
||||
});
|
||||
@@ -134,19 +142,17 @@ void UnarySinglePrecisionRangeTest(kernel_sig<T, float> kernel, ref_sig<RT, RTAr
|
||||
const ValidatorBuilder& validator_builder, const float a,
|
||||
const float b) {
|
||||
const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
|
||||
const auto reduction_factor = GetTestReductionFactor();
|
||||
const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T));
|
||||
LinearAllocGuard<float> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)};
|
||||
|
||||
|
||||
MathTest math_test(kernel, max_batch_size);
|
||||
|
||||
size_t inserted = 0u;
|
||||
for (float v = a; v != b; v = std::nextafter(v, b)) {
|
||||
values.ptr()[inserted++] = v;
|
||||
if (inserted < max_batch_size) continue;
|
||||
const auto run = [&, gs = grid_size, bs = block_size](size_t inserted) {
|
||||
math_test.Run(validator_builder, gs, bs, ref_func, inserted, values.ptr());
|
||||
};
|
||||
|
||||
math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr());
|
||||
inserted = 0u;
|
||||
}
|
||||
SinglePrecisionReducedRun(run, values, a, b, reduction_factor, max_batch_size);
|
||||
}
|
||||
|
||||
template <typename T, typename RT, typename RTArg, typename ValidatorBuilder>
|
||||
|
||||
Ссылка в новой задаче
Block a user