diff --git a/projects/hip-tests/catch/unit/math/CMakeLists.txt b/projects/hip-tests/catch/unit/math/CMakeLists.txt index 844cbfa8d3..2ebf11063d 100644 --- a/projects/hip-tests/catch/unit/math/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/math/CMakeLists.txt @@ -19,8 +19,19 @@ # THE SOFTWARE. set(TEST_SRC + trig_funcs.cc ) hip_add_exe_to_target(NAME MathsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) + +add_test(NAME Unit_Device_Single_Precision_Trig_Functions_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + trig_single_precision_negative_kernels.cc 66) + +add_test(NAME Unit_Device_Double_Precision_Trig_Functions_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + trig_double_precision_negative_kernels.cc 66) \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/binary_common.hh b/projects/hip-tests/catch/unit/math/binary_common.hh new file mode 100644 index 0000000000..72de23096e --- /dev/null +++ b/projects/hip-tests/catch/unit/math/binary_common.hh @@ -0,0 +1,136 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_BINARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(RT* const ys, const size_t num_xs, T* const x1s, \ + T* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + } + +template +void BinaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 2 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void BinaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + const auto size = values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + x1s.ptr()[i * values.size + j] = values.data[i]; + x2s.ptr()[i * values.size + j] = values.data[j]; + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr()); +} + +template +void BinaryFloatingPointTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + BinaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { BinaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} + + +#define MATH_BINARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_BINARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + BinaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/math_common.hh b/projects/hip-tests/catch/unit/math/math_common.hh new file mode 100644 index 0000000000..8b59558389 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_common.hh @@ -0,0 +1,240 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +#include + +#include "thread_pool.hh" +#include "validators.hh" + +namespace cg = cooperative_groups; + +template +std::enable_if_t, std::is_arithmetic>, std::ostream&> +operator<<(std::ostream& os, const std::pair& p) { + const auto default_prec = os.precision(); + return os << "<" << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.first << ", " + << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.second << ">" + << std::setprecision(default_prec); +} + +// This class represents a generic numerical accuracy math test. Template parameter T is the output +// type of the function being tested, and template parameter pack Ts represents the input types. The +// constructor takes a kernel with the signature void(T*, const size_t, Ts*...). The first kernel +// parameter is the output array, the second parameter is the number of outputs, and the rest of the +// parameters are arrays containing input values. The number of input arrays depends on the arity of +// the function being tested e.g. one input array for unary functions, two input arrays for binary +// functions, etc. The kernel threads take one element from each input array at the index +// corresponding to that thread, feed the input elements to the testee function, and store the +// result in the output array at the corresponding index. +// +// E.g. for a binary function the kernel would have the following signature: +// void kernel(float* y, const size_t n, float* x1, float* x2) +// +// The outputs would be calculated in parallel the following way: +// y[0] = testee(x1[0], x2[0]) +// y[1] = testee(x1[1], x2[1]) +// y[2] = testee(x1[2], x2[2]) +// ... +// +// The constructor also takes max_num_args, which represents the maximum number of input values used +// for one kernel launch. The device memory for the input and output arrays is allocated based on +// that number. +template class MathTest { + public: + MathTest(void (*kernel)(T*, const size_t, Ts*...), const size_t max_num_args) + : kernel_{kernel}, + xss_dev_(LinearAllocGuard(LinearAllocs::hipMalloc, max_num_args * sizeof(Ts))...), + y_dev_{LinearAllocs::hipMalloc, max_num_args * sizeof(T)}, + y_{LinearAllocs::hipHostMalloc, max_num_args * sizeof(T)} {} + + // This method runs the test with the following steps: + // 1. Copy the values from the input arrays provided in the parameter pack xss to device memory + // 2. Launch the kernel using the configuration provided in grid_dims and block_dims + // 3. Copy the outputs back to host memory + // 4. Generate the reference values using ref_func and compare against the outputs using the + // validator provided by validator_builder + // 5. If non-type template parameter parallel is true, then step 4 is broken up into chunks of + // work that are done in parallel on the host. + template + void Run(const ValidatorBuilder& validator_builder, const size_t grid_dims, + const size_t block_dims, RT (*const ref_func)(RTs...), const size_t num_args, + const Ts*... xss) { + fail_flag_.store(false); + error_info_.clear(); + RunImpl(validator_builder, grid_dims, block_dims, ref_func, num_args, + std::index_sequence_for{}, xss...); + } + + private: + void (*kernel_)(T*, const size_t, Ts*...); + std::tuple...> xss_dev_; + LinearAllocGuard y_dev_; + LinearAllocGuard y_; + std::atomic fail_flag_{false}; + std::mutex mtx_; + std::string error_info_; + + template + void RunImpl(const ValidatorBuilder& validator_builder, const size_t grid_dim, + const size_t block_dim, RT (*const ref_func)(RTs...), const size_t num_args, + std::index_sequence is, const Ts*... xss) { + const auto xss_tup = std::make_tuple(xss...); + + constexpr auto f = [](auto dst, auto src, size_t size) { + HIP_CHECK(hipMemcpy(dst, src, size, hipMemcpyHostToDevice)) + }; + + ((f(std::get(xss_dev_).ptr(), std::get(xss_tup), + num_args * sizeof(*std::get(xss_tup)))), + ...); + + kernel_<<>>(y_dev_.ptr(), num_args, std::get(xss_dev_).ptr()...); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y_.ptr(), y_dev_.ptr(), num_args * sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipStreamSynchronize(nullptr)); + + if constexpr (!parallel) { + for (auto i = 0u; i < num_args; ++i) { + const auto actual_val = y_.ptr()[i]; + const auto ref_val = static_cast(ref_func(xss[i]...)); + const auto validator = validator_builder(ref_val, xss[i]...); + + if (!validator->match(actual_val)) { + const auto log = MakeLogMessage(actual_val, xss[i]...) + validator->describe() + "\n"; + INFO(log); + REQUIRE(false); + } + } + + return; + } + + const auto task = [&, this](size_t iters, size_t base_idx) { + for (auto i = 0u; i < iters; ++i) { + if (fail_flag_.load(std::memory_order_relaxed)) return; + + const auto actual_val = y_.ptr()[base_idx + i]; + const auto ref_val = static_cast(ref_func(xss[base_idx + i]...)); + const auto validator = validator_builder(ref_val, xss[base_idx + i]...); + + if (!validator->match(actual_val)) { + fail_flag_.store(true, std::memory_order_relaxed); + // Several threads might have passed the first check, but failed validation. On the + // chance of this happening, access to the string stream must be serialized. + const auto log = + MakeLogMessage(actual_val, xss[base_idx + i]...) + validator->describe() + "\n"; + { + std::lock_guard lg{mtx_}; + error_info_ += log; + } + return; + } + } + }; + + const auto task_count = thread_pool.thread_count(); + const auto chunk_size = num_args / task_count; + const auto tail = num_args % task_count; + + auto base_idx = 0u; + for (auto i = 0u; i < task_count; ++i) { + const auto iters = chunk_size + (i < tail); + thread_pool.Post([=, &task] { task(iters, base_idx); }); + base_idx += iters; + } + + thread_pool.Wait(); + + INFO(error_info_); + REQUIRE(!fail_flag_); + } + + template std::string MakeLogMessage(T actual_val, Args... args) { + std::stringstream ss; + ss << "Input value(s): " << std::scientific + << std::setprecision(std::numeric_limits::max_digits10 - 1); + ((ss << " " << args), ...) << "\n" << actual_val << " "; + + return ss.str(); + } +}; + +template struct RefType {}; + +template <> struct RefType { using type = double; }; + +template <> struct RefType { using type = long double; }; + +template using RefType_t = typename RefType::type; + +template auto GetOccupancyMaxPotentialBlockSize(F kernel) { + int grid_size = 0, block_size = 0; + HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&grid_size, &block_size, kernel, 0, 0)); + return std::make_tuple(grid_size, block_size); +} + +inline size_t GetMaxAllowedDeviceMemoryUsage() { + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, 0)); + return props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f); +} + +inline uint64_t GetTestIterationCount() { return cmd_options.accuracy_iterations; } + +template using kernel_sig = void (*)(T*, const size_t, Ts*...); + +template using ref_sig = T (*)(Ts...); + +template void NegativeTestRTCWrapper(const char* program_source) { + hiprtcProgram program{}; + + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "math_test_rtc.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{error_num}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/projects/hip-tests/catch/unit/math/math_special_values.hh b/projects/hip-tests/catch/unit/math/math_special_values.hh new file mode 100644 index 0000000000..bc5488fc31 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_special_values.hh @@ -0,0 +1,287 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Disclaimer: +// This code is based on the work found in OpenCL-CTS authored by The Khronos Group. +// The original code can be found at https://github.com/KhronosGroup/OpenCL-CTS. +// We acknowledge the contributions of The Khronos Group to the development of this code. + +#pragma once + +#include +#include + +/*----------------------------------------------------------------------------- + HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type + float, double, long double respectively. Arguments: + + sm -- sign of number, + int -- integer part of mantissa (without `0x' prefix), + fract -- fractional part of mantissa (without decimal point and `L' or + `LL' suffixes), + se -- sign of exponent, + exp -- absolute value of (binary) exponent. + + Example: + + double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); // 0x1.5555555555555p-2 + + Note: + + We have to pass signs as separate arguments because gcc pass negative + integer values (e. g. `-2') into a macro as two separate tokens, so + `HEX_FLT(1, 0, -2)' produces result `0x1.0p- 2' (note a space between minus + and two) which is not a correct floating point literal. +-----------------------------------------------------------------------------*/ +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +// If compiler does not support hex floating point literals: +#define HEX_FLT(sm, int, fract, se, exp) \ + sm ldexpf((float)(0x##int##fract##UL), \ + se exp + ilogbf((float)0x##int) - ilogbf((float)(0x##int##fract##UL))) +#define HEX_DBL(sm, int, fract, se, exp) \ + sm ldexp((double)(0x##int##fract##ULL), \ + se exp + ilogb((double)0x##int) - ilogb((double)(0x##int##fract##ULL))) +#define HEX_LDBL(sm, int, fract, se, exp) \ + sm ldexpl((long double)(0x##int##fract##ULL), \ + se exp + ilogbl((long double)0x##int) - ilogbl((long double)(0x##int##fract##ULL))) +#else +// If compiler supports hex floating point literals: just concatenate all the +// parts into a literal. +#define HEX_FLT(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##F +#define HEX_DBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp +#define HEX_LDBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##L +#endif + +inline constexpr std::array kSpecialValuesDouble{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_DBL(-, 1, 0000000000001, +, 64), + HEX_DBL(-, 1, 0, +, 64), + HEX_DBL(-, 1, fffffffffffff, +, 63), + HEX_DBL(-, 1, 0000000000001, +, 63), + HEX_DBL(-, 1, 0, +, 63), + HEX_DBL(-, 1, fffffffffffff, +, 62), + HEX_DBL(-, 1, 000002, +, 32), + HEX_DBL(-, 1, 0, +, 32), + HEX_DBL(-, 1, fffffffffffff, +, 31), + HEX_DBL(-, 1, 0000000000001, +, 31), + HEX_DBL(-, 1, 0, +, 31), + HEX_DBL(-, 1, fffffffffffff, +, 30), + -1000.0, + -100.0, + -4.0, + -3.5, + -3.0, + HEX_DBL(-, 1, 8000000000001, +, 1), + -2.5, + HEX_DBL(-, 1, 7ffffffffffff, +, 1), + -2.0, + HEX_DBL(-, 1, 8000000000001, +, 0), + -1.5, + HEX_DBL(-, 1, 7ffffffffffff, +, 0), + HEX_DBL(-, 1, 0000000000001, +, 0), + -1.0, + HEX_DBL(-, 1, fffffffffffff, -, 1), + HEX_DBL(-, 1, 0000000000001, -, 1), + -0.5, + HEX_DBL(-, 1, fffffffffffff, -, 2), + HEX_DBL(-, 1, 0000000000001, -, 2), + -0.25, + HEX_DBL(-, 1, fffffffffffff, -, 3), + HEX_DBL(-, 1, 0000000000001, -, 1022), + -std::numeric_limits::min(), + HEX_DBL(-, 0, fffffffffffff, -, 1022), + HEX_DBL(-, 0, 0000000000fff, -, 1022), + HEX_DBL(-, 0, 00000000000fe, -, 1022), + HEX_DBL(-, 0, 000000000000e, -, 1022), + HEX_DBL(-, 0, 000000000000c, -, 1022), + HEX_DBL(-, 0, 000000000000a, -, 1022), + HEX_DBL(-, 0, 0000000000008, -, 1022), + HEX_DBL(-, 0, 0000000000007, -, 1022), + HEX_DBL(-, 0, 0000000000006, -, 1022), + HEX_DBL(-, 0, 0000000000005, -, 1022), + HEX_DBL(-, 0, 0000000000004, -, 1022), + HEX_DBL(-, 0, 0000000000003, -, 1022), + HEX_DBL(-, 0, 0000000000002, -, 1022), + HEX_DBL(-, 0, 0000000000001, -, 1022), + -0.0, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_DBL(+, 1, 0000000000001, +, 64), + HEX_DBL(+, 1, 0, +, 64), + HEX_DBL(+, 1, fffffffffffff, +, 63), + HEX_DBL(+, 1, 0000000000001, +, 63), + HEX_DBL(+, 1, 0, +, 63), + HEX_DBL(+, 1, fffffffffffff, +, 62), + HEX_DBL(+, 1, 000002, +, 32), + HEX_DBL(+, 1, 0, +, 32), + HEX_DBL(+, 1, fffffffffffff, +, 31), + HEX_DBL(+, 1, 0000000000001, +, 31), + HEX_DBL(+, 1, 0, +, 31), + HEX_DBL(+, 1, fffffffffffff, +, 30), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + HEX_DBL(+, 1, 8000000000001, +, 1), + +2.5, + HEX_DBL(+, 1, 7ffffffffffff, +, 1), + +2.0, + HEX_DBL(+, 1, 8000000000001, +, 0), + +1.5, + HEX_DBL(+, 1, 7ffffffffffff, +, 0), + HEX_DBL(+, 1, 0000000000001, +, 0), + +1.0, + HEX_DBL(+, 1, fffffffffffff, -, 1), + HEX_DBL(+, 1, 0000000000001, -, 1), + +0.5, + HEX_DBL(+, 1, fffffffffffff, -, 2), + HEX_DBL(+, 1, 0000000000001, -, 2), + +0.25, + HEX_DBL(+, 1, fffffffffffff, -, 3), + HEX_DBL(+, 1, 0000000000001, -, 1022), + +std::numeric_limits::min(), + HEX_DBL(+, 0, fffffffffffff, -, 1022), + HEX_DBL(+, 0, 0000000000fff, -, 1022), + HEX_DBL(+, 0, 00000000000fe, -, 1022), + HEX_DBL(+, 0, 000000000000e, -, 1022), + HEX_DBL(+, 0, 000000000000c, -, 1022), + HEX_DBL(+, 0, 000000000000a, -, 1022), + HEX_DBL(+, 0, 0000000000008, -, 1022), + HEX_DBL(+, 0, 0000000000007, -, 1022), + HEX_DBL(+, 0, 0000000000006, -, 1022), + HEX_DBL(+, 0, 0000000000005, -, 1022), + HEX_DBL(+, 0, 0000000000004, -, 1022), + HEX_DBL(+, 0, 0000000000003, -, 1022), + HEX_DBL(+, 0, 0000000000002, -, 1022), + HEX_DBL(+, 0, 0000000000001, -, 1022), + +0.0, +}; + +inline constexpr std::array kSpecialValuesFloat{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_FLT(-, 1, 000002, +, 64), + HEX_FLT(-, 1, 0, +, 64), + HEX_FLT(-, 1, fffffe, +, 63), + HEX_FLT(-, 1, 000002, +, 63), + HEX_FLT(-, 1, 0, +, 63), + HEX_FLT(-, 1, fffffe, +, 62), + HEX_FLT(-, 1, 000002, +, 32), + HEX_FLT(-, 1, 0, +, 32), + HEX_FLT(-, 1, fffffe, +, 31), + HEX_FLT(-, 1, 000002, +, 31), + HEX_FLT(-, 1, 0, +, 31), + HEX_FLT(-, 1, fffffe, +, 30), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + HEX_FLT(-, 1, 800002, +, 1), + -2.5f, + HEX_FLT(-, 1, 7ffffe, +, 1), + -2.0f, + HEX_FLT(-, 1, 800002, +, 0), + -1.5f, + HEX_FLT(-, 1, 7ffffe, +, 0), + HEX_FLT(-, 1, 000002, +, 0), + -1.0f, + HEX_FLT(-, 1, fffffe, -, 1), + HEX_FLT(-, 1, 000002, -, 1), + -0.5f, + HEX_FLT(-, 1, fffffe, -, 2), + HEX_FLT(-, 1, 000002, -, 2), + -0.25f, + HEX_FLT(-, 1, fffffe, -, 3), + HEX_FLT(-, 1, 000002, -, 126), + -std::numeric_limits::min(), + HEX_FLT(-, 0, fffffe, -, 126), + HEX_FLT(-, 0, 000ffe, -, 126), + HEX_FLT(-, 0, 0000fe, -, 126), + HEX_FLT(-, 0, 00000e, -, 126), + HEX_FLT(-, 0, 00000c, -, 126), + HEX_FLT(-, 0, 00000a, -, 126), + HEX_FLT(-, 0, 000008, -, 126), + HEX_FLT(-, 0, 000006, -, 126), + HEX_FLT(-, 0, 000004, -, 126), + HEX_FLT(-, 0, 000002, -, 126), + -0.0f, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_FLT(+, 1, 000002, +, 64), + HEX_FLT(+, 1, 0, +, 64), + HEX_FLT(+, 1, fffffe, +, 63), + HEX_FLT(+, 1, 000002, +, 63), + HEX_FLT(+, 1, 0, +, 63), + HEX_FLT(+, 1, fffffe, +, 62), + HEX_FLT(+, 1, 000002, +, 32), + HEX_FLT(+, 1, 0, +, 32), + HEX_FLT(+, 1, fffffe, +, 31), + HEX_FLT(+, 1, 000002, +, 31), + HEX_FLT(+, 1, 0, +, 31), + HEX_FLT(+, 1, fffffe, +, 30), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + HEX_FLT(+, 1, 800002, +, 1), + 2.5f, + HEX_FLT(+, 1, 7ffffe, +, 1), + +2.0f, + HEX_FLT(+, 1, 800002, +, 0), + 1.5f, + HEX_FLT(+, 1, 7ffffe, +, 0), + HEX_FLT(+, 1, 000002, +, 0), + +1.0f, + HEX_FLT(+, 1, fffffe, -, 1), + HEX_FLT(+, 1, 000002, -, 1), + +0.5f, + HEX_FLT(+, 1, fffffe, -, 2), + HEX_FLT(+, 1, 000002, -, 2), + +0.25f, + HEX_FLT(+, 1, fffffe, -, 3), + HEX_FLT(+, 1, 000002, -, 126), + +std::numeric_limits::min(), + HEX_FLT(+, 0, fffffe, -, 126), + HEX_FLT(+, 0, 000ffe, -, 126), + HEX_FLT(+, 0, 0000fe, -, 126), + HEX_FLT(+, 0, 00000e, -, 126), + HEX_FLT(+, 0, 00000c, -, 126), + HEX_FLT(+, 0, 00000a, -, 126), + HEX_FLT(+, 0, 000008, -, 126), + HEX_FLT(+, 0, 000006, -, 126), + HEX_FLT(+, 0, 000004, -, 126), + HEX_FLT(+, 0, 000002, -, 126), + +0.0f, +}; + +template struct SpecialVals { + const T* const data; + const size_t size; +}; + +inline constexpr auto kSpecialValRegistry = + std::make_tuple(SpecialVals{kSpecialValuesFloat.data(), kSpecialValuesFloat.size()}, + SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}); diff --git a/projects/hip-tests/catch/unit/math/quaternary_common.hh b/projects/hip-tests/catch/unit/math/quaternary_common.hh new file mode 100644 index 0000000000..b29eb52ef4 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/quaternary_common.hh @@ -0,0 +1,246 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_QUATERNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, T* const x1s, T* const x2s, \ + T* const x3s, T* const x4s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i], x3s[i], x4s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i], x4s[i]); \ + } \ + } \ + } + +inline constexpr std::array kSpecialValuesReducedDouble{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_DBL(-, 1, 0000000000001, +, 64), + HEX_DBL(-, 1, fffffffffffff, +, 63), + HEX_DBL(-, 1, fffffffffffff, +, 62), + HEX_DBL(-, 1, 0, +, 32), + HEX_DBL(-, 1, 0000000000001, +, 31), + HEX_DBL(-, 1, fffffffffffff, +, 30), + -1000.0, + -3.5, + HEX_DBL(-, 1, 8000000000001, +, 1), + -2.5, + HEX_DBL(-, 1, 8000000000001, +, 0), + -1.5, + -0.5, + -0.25, + HEX_DBL(-, 1, fffffffffffff, -, 3), + -std::numeric_limits::min(), + HEX_DBL(-, 0, fffffffffffff, -, 1022), + HEX_DBL(-, 0, 0000000000001, -, 1022), + -0.0, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_DBL(+, 1, 0, +, 64), + HEX_DBL(+, 1, 0000000000001, +, 63), + HEX_DBL(+, 1, 000002, +, 32), + HEX_DBL(+, 1, fffffffffffff, +, 31), + HEX_DBL(+, 1, 0, +, 31), + HEX_DBL(+, 1, fffffffffffff, +, 30), + +100.0, + +3.0, + HEX_DBL(+, 1, 7ffffffffffff, +, 1), + +2.0, + HEX_DBL(+, 1, 7ffffffffffff, +, 0), + +1.0, + HEX_DBL(+, 1, fffffffffffff, -, 2), + +std::numeric_limits::min(), + HEX_DBL(+, 0, 0000000000fff, -, 1022), + HEX_DBL(+, 0, 0000000000007, -, 1022), + +0.0, +}; + +inline constexpr std::array kSpecialValuesReducedFloat{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_FLT(-, 1, 000002, +, 64), + HEX_FLT(-, 1, fffffe, +, 63), + HEX_FLT(-, 1, fffffe, +, 62), + HEX_FLT(-, 1, 0, +, 32), + HEX_FLT(-, 1, fffffe, +, 31), + HEX_FLT(-, 1, fffffe, +, 30), + -1000.f, + -3.5f, + HEX_FLT(-, 1, 800002, +, 1), + -2.5f, + HEX_FLT(-, 1, 800002, +, 0), + -1.5f, + -0.5f, + -0.25f, + HEX_FLT(-, 1, fffffe, -, 3), + -std::numeric_limits::min(), + HEX_FLT(-, 0, fffffe, -, 126), + HEX_FLT(-, 0, 000002, -, 126), + -0.0f, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_FLT(+, 1, 0, +, 64), + HEX_FLT(+, 1, 000002, +, 63), + HEX_FLT(+, 1, 000002, +, 32), + HEX_FLT(+, 1, 000002, +, 31), + HEX_FLT(+, 1, fffffe, +, 30), + +100.f, + +4.0f, + HEX_FLT(+, 1, 7ffffe, +, 1), + +2.0f, + HEX_FLT(+, 1, 7ffffe, +, 0), + +1.0f, + HEX_FLT(+, 1, fffffe, -, 2), + +std::numeric_limits::min(), + HEX_FLT(+, 0, 000ffe, -, 126), + HEX_FLT(+, 0, 000006, -, 126), + +0.0f, +}; + +inline constexpr auto kSpecialValReducedRegistry = std::make_tuple( + SpecialVals{kSpecialValuesReducedFloat.data(), kSpecialValuesReducedFloat.size()}, + SpecialVals{kSpecialValuesReducedDouble.data(), kSpecialValuesReducedDouble.size()}); + +template +void QuaternaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 4 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x4s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s, &x3s, &x4s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x3s.ptr() + base_idx, x3s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x4s.ptr() + base_idx, x4s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr(), x3s.ptr(), x4s.ptr()); + } +} + +template +void QuaternaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValReducedRegistry); + + const auto size = values.size * values.size * values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x4s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + for (auto k = 0u; k < values.size; ++k) { + for (auto l = 0u; l < values.size; ++l) { + x1s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[i]; + x2s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[j]; + x3s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[k]; + x4s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[l]; + } + } + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr(), x3s.ptr(), x4s.ptr()); +} + +template +void QuaternaryFloatingPointTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + QuaternaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { + QuaternaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); + } +} + + +#define MATH_QUATERNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_QUATERNARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT, RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + QuaternaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/ternary_common.hh b/projects/hip-tests/catch/unit/math/ternary_common.hh new file mode 100644 index 0000000000..53b28c6b5a --- /dev/null +++ b/projects/hip-tests/catch/unit/math/ternary_common.hh @@ -0,0 +1,142 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_TERNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, T* const x1s, T* const x2s, \ + T* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i], x3s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } \ + } + +template +void TernaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 3 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s, &x3s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x3s.ptr() + base_idx, x3s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr(), x3s.ptr()); + } +} + +template +void TernaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + const auto size = values.size * values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + for (auto k = 0u; k < values.size; ++k) { + x1s.ptr()[(i * values.size + j) * values.size + k] = values.data[i]; + x2s.ptr()[(i * values.size + j) * values.size + k] = values.data[j]; + x3s.ptr()[(i * values.size + j) * values.size + k] = values.data[k]; + } + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr(), x3s.ptr()); +} + +template +void TernaryFloatingPointTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + TernaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { TernaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} + + +#define MATH_TERNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_TERNARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + TernaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/thread_pool.hh b/projects/hip-tests/catch/unit/math/thread_pool.hh new file mode 100644 index 0000000000..d45e5e8b1b --- /dev/null +++ b/projects/hip-tests/catch/unit/math/thread_pool.hh @@ -0,0 +1,64 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +#include +#include + +// This is a simple wrapper around boost::asio::thread_pool that keeps track of the number of +// currently active tasks using an atomic counter. +class ThreadPool { + public: + ThreadPool(size_t thread_count = std::thread::hardware_concurrency()) + : thread_count_(thread_count) {} + + ~ThreadPool() { thread_pool_.join(); } + + // Submits a task to the thread pool and increments the number of active tasks. The task is + // wrapped in a lambda that decrements the number of active tasks upon completion. + template void Post(T&& task) { + ++active_tasks_; + auto&& task_wrapper = [task, this] { + task(); + --active_tasks_; + }; + boost::asio::post(thread_pool_, task_wrapper); + } + + // Busy waits for the number of active tasks to reach zero. + void Wait() const { + while (active_tasks_.load(std::memory_order_relaxed)) + ; + } + + size_t thread_count() const { return thread_count_; } + + private: + const size_t thread_count_; + boost::asio::thread_pool thread_pool_{thread_count_}; + std::atomic active_tasks_; +}; + +inline ThreadPool thread_pool{}; diff --git a/projects/hip-tests/catch/unit/math/trig_double_precision_negative_kernels.cc b/projects/hip-tests/catch/unit/math/trig_double_precision_negative_kernels.cc new file mode 100644 index 0000000000..2008837fd4 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/trig_double_precision_negative_kernels.cc @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define TRIG_DP_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +/*Expecting 2 errors per macro invocation - 26 total*/ +TRIG_DP_UNARY_NEGATIVE_KERNELS(sin) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cos) +TRIG_DP_UNARY_NEGATIVE_KERNELS(tan) +TRIG_DP_UNARY_NEGATIVE_KERNELS(asin) +TRIG_DP_UNARY_NEGATIVE_KERNELS(acos) +TRIG_DP_UNARY_NEGATIVE_KERNELS(atan) +TRIG_DP_UNARY_NEGATIVE_KERNELS(sinh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cosh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(tanh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(asinh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(atanh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(sinpi) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cospi) + +/*Expecting 4 errors*/ +__global__ void atan2_kernel_v1(double* x, double y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v2(double x, double* y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v3(Dummy x, double y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v4(double x, Dummy y) { double result = atan2(x, y); } + +/*Expecting 18 errors*/ +__global__ void sincos_kernel_v1(double* x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v2(Dummy x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v3(double x, char* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v4(double x, short* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v5(double x, int* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v6(double x, long* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v7(double x, long long* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v8(double x, float* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v9(double x, Dummy* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v10(double x, const double* sptr, double* cptr) { + sincos(x, sptr, cptr); +} +__global__ void sincos_kernel_v11(double x, double* sptr, char* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v12(double x, double* sptr, short* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v13(double x, double* sptr, int* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v14(double x, double* sptr, long* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v15(double x, double* sptr, long long* cptr) { + sincos(x, sptr, cptr); +} +__global__ void sincos_kernel_v16(double x, double* sptr, float* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v17(double x, double* sptr, Dummy* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v18(double x, double* sptr, const double* cptr) { + sincos(x, sptr, cptr); +} + +/*Expecting 18 errors*/ +__global__ void sincospi_kernel_v1(float* x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v3(float x, char* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v4(float x, short* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v5(float x, int* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v6(float x, long* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v7(float x, long long* sptr, float* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v8(float x, double* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v10(float x, const float* sptr, float* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v11(float x, float* sptr, char* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v12(float x, float* sptr, short* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v13(float x, float* sptr, int* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v14(float x, float* sptr, long* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v15(float x, float* sptr, long long* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v16(float x, float* sptr, double* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v17(float x, float* sptr, Dummy* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v18(float x, float* sptr, const float* cptr) { + sincospi(x, sptr, cptr); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/trig_funcs.cc b/projects/hip-tests/catch/unit/math/trig_funcs.cc new file mode 100644 index 0000000000..9671b94ab9 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/trig_funcs.cc @@ -0,0 +1,137 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "trig_negative_kernels_rtc.hh" + +#include "unary_common.hh" +#include "binary_common.hh" + +#include + + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sin, std::sin, 2, 2); +TEST_CASE("Unit_Device_sin_sinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSin); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cos, std::cos, 2, 2) +TEST_CASE("Unit_Device_cos_cosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCos); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(tan, std::tan, 4, 2) +TEST_CASE("Unit_Device_tan_tanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTan); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 2, 2) +TEST_CASE("Unit_Device_asin_asinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsin); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(acos, std::acos, 2, 2) +TEST_CASE("Unit_Device_acos_acosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAcos); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(atan, std::atan, 2, 2) +TEST_CASE("Unit_Device_atan_atanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAtan); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sinh, std::sinh, 3, 2) +TEST_CASE("Unit_Device_sinh_sinhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSinh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cosh, std::cosh, 2, 1) +TEST_CASE("Unit_Device_cosh_coshf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCosh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(tanh, std::tanh, 2, 1) +TEST_CASE("Unit_Device_tanh_tanhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTanh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(asinh, std::asinh, 3, 2) +TEST_CASE("Unit_Device_asinh_asinhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsinh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(acosh, std::acosh, 4, 2) +TEST_CASE("Unit_Device_acosh_acoshf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAcosh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(atanh, std::atanh, 3, 2) +TEST_CASE("Unit_Device_atanh_atanhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAtanh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sinpi, boost::math::sin_pi, 2, 2); +TEST_CASE("Unit_Device_sinpi_sinpif_Negative_RTC") { NegativeTestRTCWrapper<4>(kSinpi); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cospi, boost::math::cos_pi, 2, 2); +TEST_CASE("Unit_Device_cospi_cospif_Negative_RTC") { NegativeTestRTCWrapper<4>(kCospi); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(atan2, std::atan2, 3, 2); +TEST_CASE("Unit_Device_atan2_atan2f_Negative_RTC") { NegativeTestRTCWrapper<8>(kAtan2); } + + +template +__global__ void sincos_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + sincosf(xs[i], &ys[i].first, &ys[i].second); + } else if constexpr (std::is_same_v) { + sincos(xs[i], &ys[i].first, &ys[i].second); + } + } +} + +template std::pair sincos(T x) { return {std::sin(x), std::cos(x)}; } + +TEST_CASE("Unit_Device_sincos_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + sincos_kernel, sincos, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2))); +} + +TEST_CASE("Unit_Device_sincos_Accuracy_Positive - double") { + const auto validator_builder = + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2)); + UnaryDoublePrecisionTest(sincos_kernel, sincos, validator_builder); +} + +TEST_CASE("Unit_Device_sincos_sincosf_Negative_RTC") { NegativeTestRTCWrapper<36>(kSincos); } + + +template +__global__ void sincospi_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + sincospif(xs[i], &ys[i].first, &ys[i].second); + } else if constexpr (std::is_same_v) { + sincospi(xs[i], &ys[i].first, &ys[i].second); + } + } +} + +template std::pair sincospi(T x) { + return {boost::math::sin_pi(x), boost::math::cos_pi(x)}; +} + +TEST_CASE("Unit_Device_sincospi_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + sincospi_kernel, sincospi, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2))); +} + +TEST_CASE("Unit_Device_sincospi_Accuracy_Positive - double") { + const auto validator_builder = + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2)); + UnaryDoublePrecisionTest(sincospi_kernel, sincospi, validator_builder); +} + +TEST_CASE("Unit_Device_sincospi_sincospif_Negative_RTC") { NegativeTestRTCWrapper<36>(kSincospi); } \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/trig_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/math/trig_negative_kernels_rtc.hh new file mode 100644 index 0000000000..1c855d755e --- /dev/null +++ b/projects/hip-tests/catch/unit/math/trig_negative_kernels_rtc.hh @@ -0,0 +1,320 @@ +// #define TRIG_UNARY_NEGATIVE_KERNELS(func_name) +// class Dummy { +// public: +// __device__ Dummy() {} +// __device__ ~Dummy() {} +// }; +// __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } +// __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } +// __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } +// __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +static constexpr auto kSin{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinf_kernel_v1(float* x) { float result = sinf(x); } + __global__ void sinf_kernel_v2(Dummy x) { float result = sinf(x); } + __global__ void sin_kernel_v1(double* x) { double result = sin(x); } + __global__ void sin_kernel_v2(Dummy x) { double result = sin(x); } + )"}; + +static constexpr auto kCos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cosf_kernel_v1(float* x) { float result = cosf(x); } + __global__ void cosf_kernel_v2(Dummy x) { float result = cosf(x); } + __global__ void cos_kernel_v1(double* x) { double result = cos(x); } + __global__ void cos_kernel_v2(Dummy x) { double result = cos(x); } + )"}; + +static constexpr auto kTan{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tanf_kernel_v1(float* x) { float result = tanf(x); } + __global__ void tanf_kernel_v2(Dummy x) { float result = tanf(x); } + __global__ void tan_kernel_v1(double* x) { double result = tan(x); } + __global__ void tan_kernel_v2(Dummy x) { double result = tan(x); } + )"}; + +static constexpr auto kAsin{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void asinf_kernel_v1(float* x) { float result = asinf(x); } + __global__ void asinf_kernel_v2(Dummy x) { float result = asinf(x); } + __global__ void asin_kernel_v1(double* x) { double result = asin(x); } + __global__ void asin_kernel_v2(Dummy x) { double result = asin(x); } + )"}; + +static constexpr auto kAcos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void acosf_kernel_v1(float* x) { float result = acosf(x); } + __global__ void acosf_kernel_v2(Dummy x) { float result = acosf(x); } + __global__ void acos_kernel_v1(double* x) { double result = acos(x); } + __global__ void acos_kernel_v2(Dummy x) { double result = acos(x); } + )"}; + +static constexpr auto kAtan{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atanf_kernel_v1(float* x) { float result = atanf(x); } + __global__ void atanf_kernel_v2(Dummy x) { float result = atanf(x); } + __global__ void atan_kernel_v1(double* x) { double result = atan(x); } + __global__ void atan_kernel_v2(Dummy x) { double result = atan(x); } + )"}; + +static constexpr auto kSinh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinhf_kernel_v1(float* x) { float result = sinhf(x); } + __global__ void sinhf_kernel_v2(Dummy x) { float result = sinhf(x); } + __global__ void sinh_kernel_v1(double* x) { double result = sinh(x); } + __global__ void sinh_kernel_v2(Dummy x) { double result = sinh(x); } + )"}; + +static constexpr auto kCosh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void coshf_kernel_v1(float* x) { float result = coshf(x); } + __global__ void coshf_kernel_v2(Dummy x) { float result = coshf(x); } + __global__ void cosh_kernel_v1(double* x) { double result = cosh(x); } + __global__ void cosh_kernel_v2(Dummy x) { double result = cosh(x); } + )"}; + +static constexpr auto kTanh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tanhf_kernel_v1(float* x) { float result = tanhf(x); } + __global__ void tanhf_kernel_v2(Dummy x) { float result = tanhf(x); } + __global__ void tanh_kernel_v1(double* x) { double result = tanh(x); } + __global__ void tanh_kernel_v2(Dummy x) { double result = tanh(x); } + )"}; + +static constexpr auto kAsinh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void asinhf_kernel_v1(float* x) { float result = asinhf(x); } + __global__ void asinhf_kernel_v2(Dummy x) { float result = asinhf(x); } + __global__ void asinh_kernel_v1(double* x) { double result = asinh(x); } + __global__ void asinh_kernel_v2(Dummy x) { double result = asinh(x); } + )"}; + +static constexpr auto kAcosh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void acoshf_kernel_v1(float* x) { float result = acoshf(x); } + __global__ void acoshf_kernel_v2(Dummy x) { float result = acoshf(x); } + __global__ void acosh_kernel_v1(double* x) { double result = acosh(x); } + __global__ void acosh_kernel_v2(Dummy x) { double result = acosh(x); } + )"}; + +static constexpr auto kAtanh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atanhf_kernel_v1(float* x) { float result = atanhf(x); } + __global__ void atanhf_kernel_v2(Dummy x) { float result = atanhf(x); } + __global__ void atanh_kernel_v1(double* x) { double result = atanh(x); } + __global__ void atanh_kernel_v2(Dummy x) { double result = atanh(x); } + )"}; + +static constexpr auto kSinpi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinpif_kernel_v1(float* x) { float result = sinpif(x); } + __global__ void sinpif_kernel_v2(Dummy x) { float result = sinpif(x); } + __global__ void sinpi_kernel_v1(double* x) { double result = sinpi(x); } + __global__ void sinpi_kernel_v2(Dummy x) { double result = sinpi(x); } + )"}; + +static constexpr auto kCospi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cospif_kernel_v1(float* x) { float result = cospif(x); } + __global__ void cospif_kernel_v2(Dummy x) { float result = cospif(x); } + __global__ void cospi_kernel_v1(double* x) { double result = cospi(x); } + __global__ void cospi_kernel_v2(Dummy x) { double result = cospi(x); } + )"}; + +static constexpr auto kAtan2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atan2f_kernel_v1(float* x, float y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v2(float x, float* y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v3(Dummy x, float y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v4(float x, Dummy y) { float result = atan2f(x, y); } + __global__ void atan2_kernel_v1(double* x, double y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v2(double x, double* y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v3(Dummy x, double y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v4(double x, Dummy y) { double result = atan2(x, y); } + )"}; + +static constexpr auto kSincos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sincosf_kernel_v1(float* x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v2(Dummy x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v3(float x, char* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v4(float x, short* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v5(float x, int* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v6(float x, long* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v7(float x, long long* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v8(float x, double* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v9(float x, Dummy* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v10(float x, const float* sptr, float* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincosf_kernel_v11(float x, float* sptr, char* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v12(float x, float* sptr, short* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v13(float x, float* sptr, int* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v14(float x, float* sptr, long* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v15(float x, float* sptr, long long* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincosf_kernel_v16(float x, float* sptr, double* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v17(float x, float* sptr, Dummy* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v18(float x, float* sptr, const float* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincos_kernel_v1(double* x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v2(Dummy x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v3(double x, char* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v4(double x, short* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v5(double x, int* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v6(double x, long* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v7(double x, long long* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v8(double x, float* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v9(double x, Dummy* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v10(double x, const double* sptr, double* cptr) { + sincos(x, sptr, cptr); + } + __global__ void sincos_kernel_v11(double x, double* sptr, char* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v12(double x, double* sptr, short* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v13(double x, double* sptr, int* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v14(double x, double* sptr, long* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v15(double x, double* sptr, long long* cptr) { + sincos(x, sptr, cptr); + } + __global__ void sincos_kernel_v16(double x, double* sptr, float* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v17(double x, double* sptr, Dummy* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v18(double x, double* sptr, const double* cptr) { + sincos(x, sptr, cptr); + } + )"}; + +static constexpr auto kSincospi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sincospif_kernel_v1(float* x, float* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v3(float x, char* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v4(float x, short* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v5(float x, int* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v6(float x, long* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v7(float x, long long* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v8(float x, double* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v10(float x, const float* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v11(float x, float* sptr, char* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v12(float x, float* sptr, short* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v13(float x, float* sptr, int* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v14(float x, float* sptr, long* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v15(float x, float* sptr, long long* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v16(float x, float* sptr, double* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v17(float x, float* sptr, Dummy* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v18(float x, float* sptr, const float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospi_kernel_v1(float* x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v3(float x, char* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v4(float x, short* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v5(float x, int* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v6(float x, long* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v7(float x, long long* sptr, float* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v8(float x, double* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v10(float x, const float* sptr, float* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v11(float x, float* sptr, char* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v12(float x, float* sptr, short* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v13(float x, float* sptr, int* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v14(float x, float* sptr, long* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v15(float x, float* sptr, long long* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v16(float x, float* sptr, double* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v17(float x, float* sptr, Dummy* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v18(float x, float* sptr, const float* cptr) { + sincospi(x, sptr, cptr); + } + )"}; \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/trig_single_precision_negative_kernels.cc b/projects/hip-tests/catch/unit/math/trig_single_precision_negative_kernels.cc new file mode 100644 index 0000000000..5e66d386be --- /dev/null +++ b/projects/hip-tests/catch/unit/math/trig_single_precision_negative_kernels.cc @@ -0,0 +1,118 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define TRIG_SP_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +/*Expecting 2 errors per macro invocation - 26 total*/ +TRIG_SP_UNARY_NEGATIVE_KERNELS(sin) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cos) +TRIG_SP_UNARY_NEGATIVE_KERNELS(tan) +TRIG_SP_UNARY_NEGATIVE_KERNELS(asin) +TRIG_SP_UNARY_NEGATIVE_KERNELS(acos) +TRIG_SP_UNARY_NEGATIVE_KERNELS(atan) +TRIG_SP_UNARY_NEGATIVE_KERNELS(sinh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cosh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(tanh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(asinh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(atanh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(sinpi) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cospi) + +/*Expecting 4 errors*/ +__global__ void atan2f_kernel_v1(float* x, float y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v2(float x, float* y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v3(Dummy x, float y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v4(float x, Dummy y) { float result = atan2f(x, y); } + +/*Expecting 18 errors*/ +__global__ void sincosf_kernel_v1(float* x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v2(Dummy x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v3(float x, char* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v4(float x, short* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v5(float x, int* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v6(float x, long* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v7(float x, long long* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v8(float x, double* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v9(float x, Dummy* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v10(float x, const float* sptr, float* cptr) { + sincosf(x, sptr, cptr); +} +__global__ void sincosf_kernel_v11(float x, float* sptr, char* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v12(float x, float* sptr, short* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v13(float x, float* sptr, int* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v14(float x, float* sptr, long* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v15(float x, float* sptr, long long* cptr) { + sincosf(x, sptr, cptr); +} +__global__ void sincosf_kernel_v16(float x, float* sptr, double* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v17(float x, float* sptr, Dummy* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v18(float x, float* sptr, const float* cptr) { + sincosf(x, sptr, cptr); +} + +/*Expecting 18 errors*/ +__global__ void sincospif_kernel_v1(float* x, float* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v3(float x, char* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v4(float x, short* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v5(float x, int* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v6(float x, long* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v7(float x, long long* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v8(float x, double* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v10(float x, const float* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v11(float x, float* sptr, char* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v12(float x, float* sptr, short* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v13(float x, float* sptr, int* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v14(float x, float* sptr, long* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v15(float x, float* sptr, long long* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v16(float x, float* sptr, double* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v17(float x, float* sptr, Dummy* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v18(float x, float* sptr, const float* cptr) { + sincospif(x, sptr, cptr); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/unary_common.hh b/projects/hip-tests/catch/unit/math/unary_common.hh new file mode 100644 index 0000000000..d80ffd1bbc --- /dev/null +++ b/projects/hip-tests/catch/unit/math/unary_common.hh @@ -0,0 +1,198 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_UNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(RT* const ys, const size_t num_xs, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(xs[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(xs[i]); \ + } \ + } \ + } + +template +void UnarySinglePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint32_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnarySinglePrecisionRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, const float a, + const float b) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + + uint32_t val = 0u; + const auto num_threads = thread_pool.thread_count(); + + size_t inserted = 0u; + for (float v = a; v != b; v = std::nextafter(v, b)) { + values.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); + inserted = 0u; + } +} + +template +void UnaryDoublePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const double a = std::numeric_limits::lowest(), + const double b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(double) + sizeof(T)), num_iterations); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(double)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &values] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(values.ptr() + base_idx, values.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnaryDoublePrecisionSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + MathTest math_test(kernel, values.size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, values.size, + values.data); +} + +template +void UnarySinglePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { UnarySinglePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +template +void UnaryDoublePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + UnaryDoublePrecisionSpecialValuesTest(kernel, ref, validator_builder); + } + + SECTION("Brute force") { UnaryDoublePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +#define MATH_UNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_UNARY_KERNEL_DEF(kern_name) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + double (*ref)(double) = ref_func; \ + UnarySinglePrecisionTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(sp_ulp)); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + long double (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(dp_ulp)); \ + } + +#define MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(func_name, sp_ulp, dp_ulp) \ + MATH_UNARY_WITHIN_ULP_TEST_DEF(func_name, std::func_name, sp_ulp, dp_ulp) diff --git a/projects/hip-tests/catch/unit/math/validators.hh b/projects/hip-tests/catch/unit/math/validators.hh new file mode 100644 index 0000000000..b732f79354 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/validators.hh @@ -0,0 +1,152 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +// Define a new MatcherBase class with a public 'describe' member function because +// Catch::MatcherBase::describe is protected and thus can't be used via a pointer to +// Catch::MatcherBase. +template class MatcherBase : public Catch::MatcherBase { + public: + virtual std::string describe() const = 0; + virtual ~MatcherBase() = default; +}; + +template class ValidatorBase : public MatcherBase { + public: + template + ValidatorBase(T target, Ts&&... args) : matcher_{std::forward(args)...}, target_{target} {} + + bool match(const T& val) const override { + if (std::isnan(target_)) { + return std::isnan(val); + } + + return matcher_.match(val); + } + + std::string describe() const override { + if (std::isnan(target_)) { + return "is not NaN"; + } + + return matcher_.describe(); + } + + private: + Matcher matcher_; + T target_; + bool nan = false; +}; + +template auto ULPValidatorBuilderFactory(int64_t ulps) { + return [=](T target, auto&&... args) { + return std::make_unique>( + target, Catch::WithinULP(target, ulps)); + }; +}; + +template auto AbsValidatorBuilderFactory(double margin) { + return [=](T target, auto&&... args) { + return std::make_unique>( + target, Catch::WithinAbs(target, margin)); + }; +} + +template auto RelValidatorBuilderFactory(T margin) { + return [=](T target, auto&&... args) { + return std::make_unique>( + target, Catch::WithinRel(target, margin)); + }; +} + +template class EqValidator : public MatcherBase { + public: + EqValidator(T target) : target_{target} {} + + bool match(const T& val) const override { + if (std::isnan(target_)) { + return std::isnan(val); + } + + return target_ == val; + } + + std::string describe() const override { + std::stringstream ss; + ss << " is not equal to " << target_; + return ss.str(); + } + + private: + T target_; +}; + +template auto EqValidatorBuilderFactory() { + return [](T val, auto&&... args) { return std::make_unique>(val); }; +} + +template +class PairValidator : public MatcherBase> { + public: + PairValidator(const std::pair& target, const VBF& vbf, const VBS& vbs) + : first_matcher_{vbf(target.first)}, second_matcher_{vbs(target.second)} {} + + bool match(const std::pair& val) const override { + return first_matcher_->match(val.first) && second_matcher_->match(val.second); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(std::declval())) first_matcher_; + decltype(std::declval()(std::declval())) second_matcher_; +}; + +template +auto PairValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const std::pair& t, auto&&... args) { + return std::make_unique>(t, vb, vb); + }; +} + +template +auto PairValidatorBuilderFactory(const VBF& vbf, const VBS& vbs) { + return [=](const std::pair& t, auto&&... args) { + return std::make_unique>(t, vbf, vbs); + }; +} + +template class NopValidator : public MatcherBase { + public: + bool match(const T& val) const override { return true; } + + std::string describe() const override { return ""; } +}; + +template auto NopValidatorBuilderFactory() { + return [](auto&&... args) { return std::make_unique>(); }; +}