From 8f9f3e61d283bf8712e0610278b8bf37f7d5023b Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Mon, 22 Jan 2024 23:41:01 +0530 Subject: [PATCH] EXSWHTEC-285 - Implement tests for exponential and power device math functions #229 Change-Id: I34ad7ee92960500bcd14dfd7d230ca8f8f77c172 [ROCm/hip-tests commit: 87d601411b1364fc625c59dd4c053f05726c5313] --- .../hip-tests/catch/unit/math/CMakeLists.txt | 5 + .../unit/math/math_pow_negative_kernels.cc | 92 ++++ .../math/math_pow_negative_kernels_rtc.hh | 150 ++++++ .../catch/unit/math/math_special_values.hh | 9 +- .../hip-tests/catch/unit/math/pow_common.hh | 134 ++++++ .../hip-tests/catch/unit/math/pow_funcs.cc | 455 ++++++++++++++++++ 6 files changed, 844 insertions(+), 1 deletion(-) create mode 100644 projects/hip-tests/catch/unit/math/math_pow_negative_kernels.cc create mode 100644 projects/hip-tests/catch/unit/math/math_pow_negative_kernels_rtc.hh create mode 100644 projects/hip-tests/catch/unit/math/pow_common.hh create mode 100644 projects/hip-tests/catch/unit/math/pow_funcs.cc diff --git a/projects/hip-tests/catch/unit/math/CMakeLists.txt b/projects/hip-tests/catch/unit/math/CMakeLists.txt index 33c4311038..dba9476a0e 100644 --- a/projects/hip-tests/catch/unit/math/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/math/CMakeLists.txt @@ -26,6 +26,7 @@ set(TEST_SRC double_precision_intrinsics.cc integer_intrinsics.cc root_funcs.cc + pow_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -86,3 +87,7 @@ add_test(NAME Unit_Device_root_3Dand4D_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_root_negative_kernels_3Dand4D.cc 56) +add_test(NAME Unit_Device_pow_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_pow_negative_kernels.cc 76) diff --git a/projects/hip-tests/catch/unit/math/math_pow_negative_kernels.cc b/projects/hip-tests/catch/unit/math/math_pow_negative_kernels.cc new file mode 100644 index 0000000000..c338e744a9 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_pow_negative_kernels.cc @@ -0,0 +1,92 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_EXP(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_INT_2ND(func_name) \ + __global__ void func_name##_kernel_v1(double* x, int e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v2(Dummy x, int e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v3(double x, int* e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v4(double x, Dummy e) { double result = func_name(x, e); } \ + __global__ void func_name##f_kernel_v1(float* x, int e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v2(Dummy x, int e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v3(float x, int* e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy e) { float result = func_name##f(x, e); } + + +NEGATIVE_KERNELS_SHELL_EXP(exp) +NEGATIVE_KERNELS_SHELL_EXP(exp2) +NEGATIVE_KERNELS_SHELL_EXP(exp10) +NEGATIVE_KERNELS_SHELL_EXP(expm1) + +__global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); } +__global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); } + +NEGATIVE_KERNELS_SHELL_INT_2ND(ldexp) + +__global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); } +__global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); } +__global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); } +__global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); } +__global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); } +__global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); } +__global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); } +__global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); } + +NEGATIVE_KERNELS_SHELL_INT_2ND(powi) +NEGATIVE_KERNELS_SHELL_INT_2ND(scalbn) + +__global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); } +__global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); } diff --git a/projects/hip-tests/catch/unit/math/math_pow_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/math/math_pow_negative_kernels_rtc.hh new file mode 100644 index 0000000000..7c48640bec --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_pow_negative_kernels_rtc.hh @@ -0,0 +1,150 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math pow negative Test Cases that are using RTC. +*/ + +static constexpr auto kExp{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp_kernel_v1(double* x) { double result = exp(x); } + __global__ void exp_kernel_v2(Dummy x) { double result = exp(x); } + __global__ void expf_kernel_v1(float* x) { float result = expf(x); } + __global__ void expf_kernel_v2(Dummy x) { float result = expf(x); } +)"}; + +static constexpr auto kExp2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp2_kernel_v1(double* x) { double result = exp2(x); } + __global__ void exp2_kernel_v2(Dummy x) { double result = exp2(x); } + __global__ void exp2f_kernel_v1(float* x) { float result = exp2f(x); } + __global__ void exp2f_kernel_v2(Dummy x) { float result = exp2f(x); } +)"}; + +static constexpr auto kExp10{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp10_kernel_v1(double* x) { double result = exp10(x); } + __global__ void exp10_kernel_v2(Dummy x) { double result = exp10(x); } + __global__ void exp10f_kernel_v1(float* x) { float result = exp10f(x); } + __global__ void exp10f_kernel_v2(Dummy x) { float result = exp10f(x); } +)"}; + +static constexpr auto kExpm1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void expm1_kernel_v1(double* x) { double result = expm1(x); } + __global__ void expm1_kernel_v2(Dummy x) { double result = expm1(x); } + __global__ void expm1f_kernel_v1(float* x) { float result = expm1f(x); } + __global__ void expm1f_kernel_v2(Dummy x) { float result = expm1f(x); } +)"}; + +static constexpr auto kFrexp{R"( + __global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); } + __global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); } +)"}; + +static constexpr auto kLdexp{R"( + __global__ void ldexp_kernel_v1(double* x, int e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v2(Dummy x, int e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v3(double x, int* e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v4(double x, Dummy e) { double result = ldexp(x, e); } + __global__ void ldexpf_kernel_v1(float* x, int e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v2(Dummy x, int e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v3(float x, int* e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v4(float x, Dummy e) { float result = ldexpf(x, e); } +)"}; + +static constexpr auto kPow{R"( + __global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); } + __global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); } + __global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); } + __global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); } + __global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); } + __global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); } + __global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); } + __global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); } +)"}; + +static constexpr auto kPowi{R"( + __global__ void powi_kernel_v1(double* x, int e) { double result = powi(x, e); } + __global__ void powi_kernel_v2(Dummy x, int e) { double result = powi(x, e); } + __global__ void powi_kernel_v3(double x, int* e) { double result = powi(x, e); } + __global__ void powi_kernel_v4(double x, Dummy e) { double result = powi(x, e); } + __global__ void powif_kernel_v1(float* x, int e) { float result = powif(x, e); } + __global__ void powif_kernel_v2(Dummy x, int e) { float result = powif(x, e); } + __global__ void powif_kernel_v3(float x, int* e) { float result = powif(x, e); } + __global__ void powif_kernel_v4(float x, Dummy e) { float result = powif(x, e); } +)"}; + +static constexpr auto kScalbn{R"( + __global__ void scalbn_kernel_v1(double* x, int e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v2(Dummy x, int e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v3(double x, int* e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v4(double x, Dummy e) { double result = scalbn(x, e); } + __global__ void scalbnf_kernel_v1(float* x, int e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v2(Dummy x, int e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v3(float x, int* e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v4(float x, Dummy e) { float result = scalbnf(x, e); } +)"}; + +static constexpr auto kScalbln{R"( + __global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); } + __global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); } +)"}; diff --git a/projects/hip-tests/catch/unit/math/math_special_values.hh b/projects/hip-tests/catch/unit/math/math_special_values.hh index bc5488fc31..d68a246aca 100644 --- a/projects/hip-tests/catch/unit/math/math_special_values.hh +++ b/projects/hip-tests/catch/unit/math/math_special_values.hh @@ -277,6 +277,12 @@ inline constexpr std::array kSpecialValuesFloat{ +0.0f, }; +inline constexpr std::array kSpecialValuesInt{ + 0, 1, 2, 3, 126, 127, 128, 1022, 1023, 1024, 0x02000001, 0x04000001, 1465264071, 1488522147, + std::numeric_limits::max(), -1, -2, -3, -126, -127, -128, -1022, -1023, -11024, -0x02000001, + -0x04000001, -1465264071, -1488522147, std::numeric_limits::min(), -std::numeric_limits::max() +}; + template struct SpecialVals { const T* const data; const size_t size; @@ -284,4 +290,5 @@ template struct SpecialVals { inline constexpr auto kSpecialValRegistry = std::make_tuple(SpecialVals{kSpecialValuesFloat.data(), kSpecialValuesFloat.size()}, - SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}); + SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}, + SpecialVals{kSpecialValuesInt.data(), kSpecialValuesInt.size()}); diff --git a/projects/hip-tests/catch/unit/math/pow_common.hh b/projects/hip-tests/catch/unit/math/pow_common.hh new file mode 100644 index 0000000000..95402c72d1 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/pow_common.hh @@ -0,0 +1,134 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_POW_INT_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T1* const x1s, \ + T2* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + } + +template +using kernel_pow_int_sig = void (*)(T1*, const size_t, T1*, T2*); + +template using ref_pow_int_sig = T1 (*)(T1, T2); + +template +void PowIntFloatingPointBruteForceTest(kernel_pow_int_sig kernel, + ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) * 2 + sizeof(T2)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T1)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s] { + const auto generator1 = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(std::numeric_limits::lowest(), + std::numeric_limits::max()); + return static_cast(unif_dist(rng)); + }; + const auto generator2 = [] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_int_distribution unif_dist(std::numeric_limits::lowest(), + std::numeric_limits::max()); + return unif_dist(rng); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator1); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator2); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void PowIntFloatingPointSpecialValuesTest(kernel_pow_int_sig kernel, + ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values1 = std::get>(kSpecialValRegistry); + const auto values2 = std::get>(kSpecialValRegistry); + + const auto size = values1.size * values2.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(T1)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(T2)}; + + for (auto i = 0u; i < values1.size; ++i) { + for (auto j = 0u; j < values2.size; ++j) { + x1s.ptr()[i * values2.size + j] = values1.data[i]; + x2s.ptr()[i * values2.size + j] = static_cast(values2.data[j]); + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr()); +} + +template +void PowIntFloatingPointTest(kernel_pow_int_sig kernel, ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + PowIntFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { PowIntFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/pow_funcs.cc b/projects/hip-tests/catch/unit/math/pow_funcs.cc new file mode 100644 index 0000000000..1722a26db5 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/pow_funcs.cc @@ -0,0 +1,455 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "pow_common.hh" +#include "math_pow_negative_kernels_rtc.hh" + +/** + * @addtogroup PowMathFuncs PowMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `expf(x)` for all possible inputs and `exp(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for expf and exp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp_expf_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp2f(x)` for all possible inputs and `exp2(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp2(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp2, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for exp2f and exp2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp2_exp2f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp2); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `expm1f(x)` for all possible inputs and `expm1(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(expm1, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for expm1f and expm1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_expm1_expm1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExpm1); } + +MATH_UNARY_KERNEL_DEF(exp10) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp10f(x)` for all possible inputs. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10f_Accuracy_Positive") { + auto exp10_ref = [](double arg) -> double { return std::pow(10, arg); }; + double (*ref)(double) = exp10_ref; + UnarySinglePrecisionTest(exp10_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp10(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10_Accuracy_Positive") { + auto exp10_ref = [](long double arg) -> long double { return std::pow(10, arg); }; + long double (*ref)(long double) = exp10_ref; + UnaryDoublePrecisionTest(exp10_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for exp10f and exp10. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10_exp10f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp10); } + +template +__global__ void frexp_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = frexpf(xs[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = frexp(xs[i], &ys[i].second); + } + } +} + +template std::pair frexp_ref(T arg) { + int exp_v; + T res = std::frexp(arg, &exp_v); + return {res, exp_v}; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `frexpf(x, exp)` for all possible inputs. The results are + * compared against reference function `double std::frexp(double, int*)`. The maximum ulp error is + * 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexpf_Accuracy_Positive") { + UnarySinglePrecisionTest( + frexp_kernel, frexp_ref, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0), + EqValidatorBuilderFactory())); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `frexp(x, exp)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double std::frexp(long double, int*)`. The maximum ulp + * error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexp_Accuracy_Positive") { + UnaryDoublePrecisionTest( + frexp_kernel, frexp_ref, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0), + EqValidatorBuilderFactory())); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for frexpf and frexp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexp_frexpf_Negative_RTC") { NegativeTestRTCWrapper<20>(kFrexp); } + + +/********** Binary Functions **********/ + +MATH_BINARY_KERNEL_DEF(pow) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `powf(x, y)` and `pow(x, y)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::pow(T, T)`. The maximum ulp error + * for single precision is 4 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_pow_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto pow_ref = [](RT arg1, RT arg2) -> RT { + if (std::isinf(arg1) && arg2 < 0) return 0; + return std::pow(arg1, arg2); + }; + RT (*ref)(RT, RT) = pow_ref; + const auto ulp = std::is_same_v ? 4 : 2; + BinaryFloatingPointTest(pow_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for powf and pow. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_pow_powf_Negative_RTC") { NegativeTestRTCWrapper<8>(kPow); } + +MATH_POW_INT_KERNEL_DEF(ldexp) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ldexpf(x, exp)` and `ldexp(x, exp)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::ldexp(T, int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_ldexp_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, int) = std::ldexp; + PowIntFloatingPointTest(ldexp_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for ldexpf and ldexp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ldexp_ldexpf_Negative_RTC") { NegativeTestRTCWrapper<8>(kLdexp); } + +MATH_POW_INT_KERNEL_DEF(powi) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `powi(x, exp)` and `powi(x, exp)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::pow(T, T)`. The maximum ulp error + * for single precision is 4 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_powi_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto pow_ref = [](RT arg1, int arg2) -> RT { + if (std::isinf(arg1) && arg2 < 0) return 0; + return std::pow(arg1, static_cast(arg2)); + }; + RT (*ref)(RT, int) = pow_ref; + const auto ulp = std::is_same_v ? 4 : 2; + PowIntFloatingPointTest(powi_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for powif and powi. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_powi_powif_Negative_RTC") { NegativeTestRTCWrapper<8>(kPowi); } + +MATH_POW_INT_KERNEL_DEF(scalbn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `scalbnf(x, n)` and `scalbn(x, n)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::scalbn(T, int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_scalbn_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, int) = std::scalbn; + PowIntFloatingPointTest(scalbn_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for scalbnf and scalbn. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_scalbn_scalbnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbn); } + +MATH_POW_INT_KERNEL_DEF(scalbln) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `scalblnf(x, l)` and `scalbln(x, l)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::scalbn(T, long int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_scalbln_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, long int) = std::scalbln; + PowIntFloatingPointTest(scalbln_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for scalblnf and scalbln. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_scalbln_scalblnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbln); }