From e4b44cc41351aa4e100903ad05070f52e5ec8054 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Mon, 22 Jan 2024 22:12:44 +0530 Subject: [PATCH] EXSWHTEC-291 - Implement tests for floating-point and integer math intrinsics #227 Change-Id: I55a3cbf5ce15cd93a280af295b88c28e51246148 [ROCm/hip-tests commit: 17291543418b487f6f42e6aa7b4a46adfa48f3b3] --- .../hip-tests/catch/unit/math/CMakeLists.txt | 18 + .../unit/math/double_precision_intrinsics.cc | 243 ++++++++ ...e_precision_intrinsics_negative_kernels.cc | 46 ++ .../catch/unit/math/integer_intrinsics.cc | 320 +++++++++++ .../integer_intrinsics_negative_kernels.cc | 67 +++ .../unit/math/single_precision_intrinsics.cc | 530 ++++++++++++++++++ ...e_precision_intrinsics_negative_kernels.cc | 56 ++ 7 files changed, 1280 insertions(+) create mode 100644 projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc create mode 100644 projects/hip-tests/catch/unit/math/double_precision_intrinsics_negative_kernels.cc create mode 100644 projects/hip-tests/catch/unit/math/integer_intrinsics.cc create mode 100644 projects/hip-tests/catch/unit/math/integer_intrinsics_negative_kernels.cc create mode 100644 projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc create mode 100644 projects/hip-tests/catch/unit/math/single_precision_intrinsics_negative_kernels.cc diff --git a/projects/hip-tests/catch/unit/math/CMakeLists.txt b/projects/hip-tests/catch/unit/math/CMakeLists.txt index b0a2b5d00c..e552b9a8a8 100644 --- a/projects/hip-tests/catch/unit/math/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/math/CMakeLists.txt @@ -22,6 +22,9 @@ set(TEST_SRC trig_funcs.cc misc_funcs.cc remainder_and_rounding_funcs.cc + single_precision_intrinsics.cc + double_precision_intrinsics.cc + integer_intrinsics.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -58,3 +61,18 @@ add_test(NAME Unit_Device_rounding_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_rounding_negative_kernels.cc 40) + +add_test(NAME Unit_Single_Precision_Intrinsics_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + single_precision_intrinsics_negative_kernels.cc 42) + +add_test(NAME Unit_Double_Precision_Intrinsics_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + double_precision_intrinsics_negative_kernels.cc 18) + +add_test(NAME Unit_Integer_Intrinsics_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + integer_intrinsics_negative_kernels.cc 20) diff --git a/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc b/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc new file mode 100644 index 0000000000..69e5e2a8d0 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/double_precision_intrinsics.cc @@ -0,0 +1,243 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +/********** Unary Functions **********/ + +#define MATH_UNARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryDoublePrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_DP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(double target, double x) + + +static double __drcp_rn_ref(double x) { return 1.0 / x; } + +MATH_UNARY_DP_KERNEL_DEF(__drcp_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__drcp_rn(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_DP_TEST_DEF_IMPL(__drcp_rn, __drcp_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_DP_KERNEL_DEF(__dsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dsqrt_rn(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `double std::sqrt(double)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_DP_TEST_DEF_IMPL(__dsqrt_rn, static_cast(std::sqrt), + EqValidatorBuilderFactory()); + + +/********** Binary Functions **********/ + +#define MATH_BINARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s, \ + double* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_DP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_DP_TEST_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(double target, \ + double x1, double x2) + + +static double __dadd_rn_ref(double x1, double x2) { return x1 + x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dadd_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dadd_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dadd_rn, __dadd_rn_ref, EqValidatorBuilderFactory()); + + +static double __dsub_rn_ref(double x1, double x2) { return x1 - x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dsub_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dsub_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dsub_rn, __dsub_rn_ref, EqValidatorBuilderFactory()); + + +static double __dmul_rn_ref(double x1, double x2) { return x1 * x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dmul_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dmul_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dmul_rn, __dmul_rn_ref, EqValidatorBuilderFactory()); + + +static double __ddiv_rn_ref(double x1, double x2) { return x1 / x2; } + +MATH_BINARY_DP_KERNEL_DEF(__ddiv_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__ddiv_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__ddiv_rn, __ddiv_rn_ref, EqValidatorBuilderFactory()); + + +/********** Ternary Functions **********/ + +#define MATH_TERNARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s, \ + double* const x2s, double* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_DP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder( \ + double target, double x1, double x2, double x3) + + +MATH_TERNARY_DP_KERNEL_DEF(__fma_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fma(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_DP_TEST_DEF_IMPL(__fma_rn, static_cast(std::fma), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/double_precision_intrinsics_negative_kernels.cc b/projects/hip-tests/catch/unit/math/double_precision_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..4ea26ae102 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/double_precision_intrinsics_negative_kernels.cc @@ -0,0 +1,46 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +#define INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } + + +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dadd_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dsub_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dmul_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__ddiv_rn) +INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(__dsqrt_rn) \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/integer_intrinsics.cc b/projects/hip-tests/catch/unit/math/integer_intrinsics.cc new file mode 100644 index 0000000000..d851577831 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/integer_intrinsics.cc @@ -0,0 +1,320 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +__global__ void __brev_kernel(unsigned int* y, unsigned int x) { y[0] = __brev(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__brev(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___brev_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + __brev_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAA); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0x55555555); +} + +__global__ void __brevll_kernel(unsigned long long int* y, unsigned long long int x) { + y[0] = __brevll(x); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__brevll(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___brevll_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long int)); + + __brevll_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAAAAAAAAAA); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0x5555555555555555); +} + +template __global__ void __clz_kernel(T* y, T x) { y[0] = __clz(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__clz(x)`. Run for `int` and `unsigned int` overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___clz_Sanity_Positive", "", int, unsigned int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __clz_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 32); + + TestType x = 1; + for (int i = 0; i < 32; ++i) { + __clz_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 31 - i); + } +} + +template __global__ void __clzll_kernel(T* y, T x) { y[0] = __clzll(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__clzll(x)`. Run for `long long int` and `unsigned long long int` + * overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___clzll_Sanity_Positive", "", long long int, + unsigned long long int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __clzll_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 64); + + TestType x = 1; + for (int i = 0; i < 64; ++i) { + __clzll_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 63 - i); + } +} + +template __global__ void __ffs_kernel(T* y, T x) { y[0] = __ffs(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__ffs(x)`. Run for `int` and `unsigned int` overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___ffs_Sanity_Positive", "", int, unsigned int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __ffs_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + TestType x = 1; + for (int i = 0; i < 32; ++i) { + __ffs_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +template __global__ void __ffsll_kernel(T* y, T x) { y[0] = __ffsll(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__ffsll(x)`. Run for `long long int` and `unsigned long long int` + * overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___ffsll_Sanity_Positive", "", long long int, + unsigned long long int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __ffsll_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + TestType x = 1; + for (int i = 0; i < 64; ++i) { + __ffsll_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __popc_kernel(unsigned int* y, unsigned int x) { y[0] = __popc(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__popc(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___popc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + __popc_kernel<<<1, 1>>>(y.ptr(), 0); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + unsigned int x = 0; + for (int i = 0; i < 32; ++i) { + __popc_kernel<<<1, 1>>>(y.ptr(), x |= (1u << i)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __popcll_kernel(unsigned long long int* y, unsigned long long int x) { + y[0] = __popcll(x); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__popcll(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___popcll_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long int)); + + __popcll_kernel<<<1, 1>>>(y.ptr(), 0); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + unsigned long long int x = 0; + for (int i = 0; i < 64; ++i) { + __popcll_kernel<<<1, 1>>>(y.ptr(), x |= (1ull << i)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __mul24_kernel(int* y, int x1, int x2) { y[0] = __mul24(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mul24(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mul24_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mul24_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == x1 * x2); +} + +__global__ void __umul24_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __umul24(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umul24(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umul24_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFF); + + __umul24_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == x1 * x2); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/integer_intrinsics_negative_kernels.cc b/projects/hip-tests/catch/unit/math/integer_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..ec5ac98fe3 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/integer_intrinsics_negative_kernels.cc @@ -0,0 +1,67 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(int* x) { int result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { int result = func_name(x); } + +#define INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(long long int* x) { long long int result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { long long int result = func_name(x); } + +#define INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(int* x, int y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(int x, int* y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, int y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(int x, Dummy y) { int result = func_name(x, y); } + +#define INTRINSIC_BINARY_LONGLONG_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(long long int* x, long long int y) { \ + long long int result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(long long int x, long long int* y) { \ + long long int result = func_name##(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, long long int y) { \ + long long int result = func_name##(x, y); \ + } \ + __global__ void func_name##_kernel_v4(long long int x, Dummy y) { \ + long long int result = func_name##(x, y); \ + } + +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__brev) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__clz) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__ffs) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__popc) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__brevll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__clzll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__ffsll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__popcll) +INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(__mul24) \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc b/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc new file mode 100644 index 0000000000..1d9d340c0f --- /dev/null +++ b/projects/hip-tests/catch/unit/math/single_precision_intrinsics.cc @@ -0,0 +1,530 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +/********** Unary Functions **********/ + +#define MATH_UNARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnarySinglePrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_SP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x) + + +static float __frcp_rn_ref(float x) { return 1.0f / x; } + +MATH_UNARY_SP_KERNEL_DEF(__frcp_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__frcp_rn(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__frcp_rn, __frcp_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_SP_KERNEL_DEF(__fsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fsqrt_rn(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__fsqrt_rn, static_cast(std::sqrt), + EqValidatorBuilderFactory()); + + +static float __frsqrt_rn_ref(float x) { return 1.0f / std::sqrt(x); } + +MATH_UNARY_SP_KERNEL_DEF(__frsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__frsqrt_rn(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__frsqrt_rn, __frsqrt_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__expf) { + const int64_t ulp_err = 2 + static_cast(std::floor(std::abs(1.16f * x))); + return ULPValidatorBuilderFactory(ulp_err)(target); +} + +MATH_UNARY_SP_KERNEL_DEF(__expf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__expf(x)` for all possible inputs. The results are + * compared against reference function `double std::exp(double)`. The maximum ulp error is `2 + + * floor(abs(1.16 * x))`. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__expf, static_cast(std::exp)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__exp10f) { + const int64_t ulp_err = 2 + static_cast(std::floor(std::abs(2.95f * x))); + return ULPValidatorBuilderFactory(ulp_err)(target); +} + +MATH_UNARY_SP_KERNEL_DEF(__exp10f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__exp10f(x)` for all possible inputs. The results are + * compared against reference function `double exp10(double)`. The maximum ulp error is `2 + + * floor(abs(2.95 * x))`. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__exp10f, static_cast(exp10)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__logf) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -21.41); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(3)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__logf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__logf(x)` for all possible inputs. The results are + * compared against reference function `double std::log(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-21.41, otherwise, the maximum ulp error is 3. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__logf, static_cast(std::log)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log2f) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -22.0); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(2)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__log2f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__log2f(x)` for all possible inputs. The results are + * compared against reference function `double std::log2(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-22, otherwise, the maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__log2f, static_cast(std::log2)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log10f) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -24.0); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(3)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__log10f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__log10f(x)` for all possible inputs. The results are + * compared against reference function `double std::log10(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-24, otherwise, the maximum ulp error is 3. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__log10f, static_cast(std::log10)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__sinf) { + if (-M_PI <= x && x <= M_PI) { + const auto abs_err = std::pow(2.0, -21.41); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__sinf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sinf(x)` for all possible inputs. The results are + * compared against reference function `double std::sin(double)`. For `x` in [-PI, PI], the maximum + * absolute error is 2^-21.41, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__sinf, static_cast(std::sin)); + + +__device__ float __sincosf_sin(float x) { + float sin, cos; + __sincosf(x, &sin, &cos); + return sin; +} + +MATH_UNARY_SP_KERNEL_DEF(__sincosf_sin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The + * results in `sptr` are compared against reference function `double std::sin(double)`. For `x` in + * [-PI, PI], the maximum absolute error is 2^-21.41, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_sin, static_cast(std::sin), + __sinf_validator_builder); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__cosf) { + if (-M_PI <= x && x <= M_PI) { + const auto abs_err = std::pow(2.0, -21.19); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__cosf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__cosf(x)` for all possible inputs. The results are + * compared against reference function `double std::cos(double)`. For `x` in [-PI, PI], the maximum + * absolute error is 2^-21.19, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__cosf, static_cast(std::cos)); + + +__device__ float __sincosf_cos(float x) { + float sin, cos; + __sincosf(x, &sin, &cos); + return cos; +} + +MATH_UNARY_SP_KERNEL_DEF(__sincosf_cos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The + * results in `cptr` are compared against reference function `double std::cos(double)`. For `x` in + * [-PI, PI], the maximum absolute error is 2^-21.19, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_cos, static_cast(std::cos), + __cosf_validator_builder); + + +/********** Binary Functions **********/ + +#define MATH_BINARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s, \ + float* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_SP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2) + + +static float __fadd_rn_ref(float x1, float x2) { return x1 + x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fadd_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fadd_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fadd_rn, __fadd_rn_ref, EqValidatorBuilderFactory()); + + +static float __fsub_rn_ref(float x1, float x2) { return x1 - x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fsub_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fsub_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fsub_rn, __fsub_rn_ref, EqValidatorBuilderFactory()); + + +static float __fmul_rn_ref(float x1, float x2) { return x1 * x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fmul_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fmul_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fmul_rn, __fmul_rn_ref, EqValidatorBuilderFactory()); + + +static float __fdiv_rn_ref(float x1, float x2) { return x1 / x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fdiv_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fdiv_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fdiv_rn, __fdiv_rn_ref, EqValidatorBuilderFactory()); + + +MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(__fdividef) { + const auto abs_x2 = std::abs(x2); + if (std::pow(2.0f, -126.0f) <= abs_x2 && abs_x2 <= std::pow(2.0f, 126.0f)) { + return ULPValidatorBuilderFactory(2)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_BINARY_SP_KERNEL_DEF(__fdividef); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fdividef(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. For `|y|` in [2^-126, 2^126], the + * maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF(__fdividef, __fdiv_rn_ref); + + +/********** Ternary Functions **********/ + +#define MATH_TERNARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s, \ + float* const x2s, float* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_SP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2, float x3) + + +MATH_TERNARY_SP_KERNEL_DEF(__fmaf_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fmaf(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_SP_TEST_DEF_IMPL(__fmaf_rn, static_cast(std::fma), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/math/single_precision_intrinsics_negative_kernels.cc b/projects/hip-tests/catch/unit/math/single_precision_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..f293894f83 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/single_precision_intrinsics_negative_kernels.cc @@ -0,0 +1,56 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x) { float result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { float result = func_name(x); } + +#define INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x, float y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(float x, float* y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, float y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(float x, Dummy y) { float result = func_name(x, y); } + +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__fsqrt_rn) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__expf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__exp10f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__logf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log2f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log10f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__sinf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__cosf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__tanf) + +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fadd_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fsub_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fmul_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdiv_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdividef) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__powf) \ No newline at end of file