EXSWHTEC-291 - Implement tests for floating-point and integer math intrinsics #227

Change-Id: I55a3cbf5ce15cd93a280af295b88c28e51246148 [ROCm/hip-tests commit: 1729154341]
2024-01-22 22:12:44 +05:30
@@ -22,6 +22,9 @@ set(TEST_SRC
  trig_funcs.cc
  misc_funcs.cc
  remainder_and_rounding_funcs.cc
+  single_precision_intrinsics.cc
+  double_precision_intrinsics.cc
+  integer_intrinsics.cc
 )

 if(HIP_PLATFORM MATCHES "nvidia")
@@ -58,3 +61,18 @@ add_test(NAME Unit_Device_rounding_Negative
         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
         math_rounding_negative_kernels.cc 40)
+
+add_test(NAME Unit_Single_Precision_Intrinsics_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         single_precision_intrinsics_negative_kernels.cc 42)
+
+add_test(NAME Unit_Double_Precision_Intrinsics_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         double_precision_intrinsics_negative_kernels.cc 18)
+
+add_test(NAME Unit_Integer_Intrinsics_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         integer_intrinsics_negative_kernels.cc 20)
@@ -0,0 +1,243 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+#include "unary_common.hh"
+#include "binary_common.hh"
+#include "ternary_common.hh"
+
+/********** Unary Functions **********/
+
+#define MATH_UNARY_DP_KERNEL_DEF(func_name)                                                        \
+  __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const xs) {    \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(xs[i]);                                                                    \
+    }                                                                                              \
+  }
+
+#define MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder)                        \
+  TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") {                                      \
+    UnaryDoublePrecisionTest(func_name##_kernel, ref_func, validator_builder);                     \
+  }
+
+#define MATH_UNARY_DP_TEST_DEF(func_name, ref_func)                                                \
+  MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder)
+
+#define MATH_UNARY_DP_VALIDATOR_BUILDER_DEF(func_name)                                             \
+  static std::unique_ptr<MatcherBase<double>> func_name##_validator_builder(double target, double x)
+
+
+static double __drcp_rn_ref(double x) { return 1.0 / x; }
+
+MATH_UNARY_DP_KERNEL_DEF(__drcp_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__drcp_rn(x)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are
+ * IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_DP_TEST_DEF_IMPL(__drcp_rn, __drcp_rn_ref, EqValidatorBuilderFactory<double>());
+
+
+MATH_UNARY_DP_KERNEL_DEF(__dsqrt_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__dsqrt_rn(x)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The results are
+ * compared against reference function `double std::sqrt(double)`. The error bounds are
+ * IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_DP_TEST_DEF_IMPL(__dsqrt_rn, static_cast<double (*)(double)>(std::sqrt),
+                            EqValidatorBuilderFactory<double>());
+
+
+/********** Binary Functions **********/
+
+#define MATH_BINARY_DP_KERNEL_DEF(func_name)                                                       \
+  __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s,     \
+                                     double* const x2s) {                                          \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(x1s[i], x2s[i]);                                                           \
+    }                                                                                              \
+  }
+
+#define MATH_BINARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder)                       \
+  TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") {                                      \
+    BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder);                      \
+  }
+
+#define MATH_BINARY_DP_TEST_DEF(func_name, ref_func)                                               \
+  MATH_BINARY_DP_TEST_IMPL(func_name, ref_func, func_name##_validator_builder)
+
+#define MATH_BINARY_DP_VALIDATOR_BUILDER_DEF(func_name)                                            \
+  static std::unique_ptr<MatcherBase<double>> func_name##_validator_builder(double target,         \
+                                                                            double x1, double x2)
+
+
+static double __dadd_rn_ref(double x1, double x2) { return x1 + x2; }
+
+MATH_BINARY_DP_KERNEL_DEF(__dadd_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__dadd_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_DP_TEST_DEF_IMPL(__dadd_rn, __dadd_rn_ref, EqValidatorBuilderFactory<double>());
+
+
+static double __dsub_rn_ref(double x1, double x2) { return x1 - x2; }
+
+MATH_BINARY_DP_KERNEL_DEF(__dsub_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__dsub_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_DP_TEST_DEF_IMPL(__dsub_rn, __dsub_rn_ref, EqValidatorBuilderFactory<double>());
+
+
+static double __dmul_rn_ref(double x1, double x2) { return x1 * x2; }
+
+MATH_BINARY_DP_KERNEL_DEF(__dmul_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__dmul_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_DP_TEST_DEF_IMPL(__dmul_rn, __dmul_rn_ref, EqValidatorBuilderFactory<double>());
+
+
+static double __ddiv_rn_ref(double x1, double x2) { return x1 / x2; }
+
+MATH_BINARY_DP_KERNEL_DEF(__ddiv_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__ddiv_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_DP_TEST_DEF_IMPL(__ddiv_rn, __ddiv_rn_ref, EqValidatorBuilderFactory<double>());
+
+
+/********** Ternary Functions **********/
+
+#define MATH_TERNARY_DP_KERNEL_DEF(func_name)                                                      \
+  __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s,     \
+                                     double* const x2s, double* const x3s) {                       \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(x1s[i], x2s[i], x3s[i]);                                                   \
+    }                                                                                              \
+  }
+
+#define MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder)                      \
+  TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") {                                      \
+    TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder);                     \
+  }
+
+#define MATH_TERNARY_DP_TEST_DEF(func_name, ref_func, validator_builder)                           \
+  MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder)
+
+#define MATH_TERNARY_DP_VALIDATOR_BUILDER_DEF(func_name)                                           \
+  static std::unique_ptr<MatcherBase<double>> func_name##_validator_builder(                       \
+      double target, double x1, double x2, double x3)
+
+
+MATH_TERNARY_DP_KERNEL_DEF(__fma_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fma(x,y,z)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/double_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_TERNARY_DP_TEST_DEF_IMPL(__fma_rn, static_cast<double (*)(double, double, double)>(std::fma),
+                              EqValidatorBuilderFactory<double>());
@@ -0,0 +1,46 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(func_name)                                         \
+  __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); }               \
+  __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); }
+
+#define INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(func_name)                                        \
+  __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); }  \
+  __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); }  \
+  __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); }    \
+  __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); }
+
+
+INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dadd_rn)
+INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dsub_rn)
+INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dmul_rn)
+INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__ddiv_rn)
+INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(__dsqrt_rn)
@@ -0,0 +1,320 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+#include <resource_guards.hh>
+
+__global__ void __brev_kernel(unsigned int* y, unsigned int x) { y[0] = __brev(x); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__brev(x)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___brev_Sanity_Positive") {
+  LinearAllocGuard<unsigned int> y(LinearAllocs::hipMallocManaged, sizeof(unsigned int));
+
+  __brev_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAA);
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 0x55555555);
+}
+
+__global__ void __brevll_kernel(unsigned long long int* y, unsigned long long int x) {
+  y[0] = __brevll(x);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__brevll(x)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___brevll_Sanity_Positive") {
+  LinearAllocGuard<unsigned long long int> y(LinearAllocs::hipMallocManaged,
+                                             sizeof(unsigned long long int));
+
+  __brevll_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAAAAAAAAAA);
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 0x5555555555555555);
+}
+
+template <typename T> __global__ void __clz_kernel(T* y, T x) { y[0] = __clz(x); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__clz(x)`. Run for `int` and `unsigned int` overloads.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device___clz_Sanity_Positive", "", int, unsigned int) {
+  LinearAllocGuard<TestType> y(LinearAllocs::hipMallocManaged, sizeof(TestType));
+
+  __clz_kernel<<<1, 1>>>(y.ptr(), static_cast<TestType>(0));
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 32);
+
+  TestType x = 1;
+  for (int i = 0; i < 32; ++i) {
+    __clz_kernel<<<1, 1>>>(y.ptr(), x << i);
+    HIP_CHECK(hipDeviceSynchronize());
+
+    REQUIRE(y.ptr()[0] == 31 - i);
+  }
+}
+
+template <typename T> __global__ void __clzll_kernel(T* y, T x) { y[0] = __clzll(x); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__clzll(x)`. Run for `long long int` and `unsigned long long int`
+ * overloads.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device___clzll_Sanity_Positive", "", long long int,
+                   unsigned long long int) {
+  LinearAllocGuard<TestType> y(LinearAllocs::hipMallocManaged, sizeof(TestType));
+
+  __clzll_kernel<<<1, 1>>>(y.ptr(), static_cast<TestType>(0));
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 64);
+
+  TestType x = 1;
+  for (int i = 0; i < 64; ++i) {
+    __clzll_kernel<<<1, 1>>>(y.ptr(), x << i);
+    HIP_CHECK(hipDeviceSynchronize());
+
+    REQUIRE(y.ptr()[0] == 63 - i);
+  }
+}
+
+template <typename T> __global__ void __ffs_kernel(T* y, T x) { y[0] = __ffs(x); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__ffs(x)`. Run for `int` and `unsigned int` overloads.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device___ffs_Sanity_Positive", "", int, unsigned int) {
+  LinearAllocGuard<TestType> y(LinearAllocs::hipMallocManaged, sizeof(TestType));
+
+  __ffs_kernel<<<1, 1>>>(y.ptr(), static_cast<TestType>(0));
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 0);
+
+  TestType x = 1;
+  for (int i = 0; i < 32; ++i) {
+    __ffs_kernel<<<1, 1>>>(y.ptr(), x << i);
+    HIP_CHECK(hipDeviceSynchronize());
+
+    REQUIRE(y.ptr()[0] == i + 1);
+  }
+}
+
+template <typename T> __global__ void __ffsll_kernel(T* y, T x) { y[0] = __ffsll(x); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__ffsll(x)`. Run for `long long int` and `unsigned long long int`
+ * overloads.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device___ffsll_Sanity_Positive", "", long long int,
+                   unsigned long long int) {
+  LinearAllocGuard<TestType> y(LinearAllocs::hipMallocManaged, sizeof(TestType));
+
+  __ffsll_kernel<<<1, 1>>>(y.ptr(), static_cast<TestType>(0));
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 0);
+
+  TestType x = 1;
+  for (int i = 0; i < 64; ++i) {
+    __ffsll_kernel<<<1, 1>>>(y.ptr(), x << i);
+    HIP_CHECK(hipDeviceSynchronize());
+
+    REQUIRE(y.ptr()[0] == i + 1);
+  }
+}
+
+__global__ void __popc_kernel(unsigned int* y, unsigned int x) { y[0] = __popc(x); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__popc(x)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___popc_Sanity_Positive") {
+  LinearAllocGuard<unsigned int> y(LinearAllocs::hipMallocManaged, sizeof(unsigned int));
+
+  __popc_kernel<<<1, 1>>>(y.ptr(), 0);
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 0);
+
+  unsigned int x = 0;
+  for (int i = 0; i < 32; ++i) {
+    __popc_kernel<<<1, 1>>>(y.ptr(), x |= (1u << i));
+    HIP_CHECK(hipDeviceSynchronize());
+
+    REQUIRE(y.ptr()[0] == i + 1);
+  }
+}
+
+__global__ void __popcll_kernel(unsigned long long int* y, unsigned long long int x) {
+  y[0] = __popcll(x);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__popcll(x)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___popcll_Sanity_Positive") {
+  LinearAllocGuard<unsigned long long int> y(LinearAllocs::hipMallocManaged,
+                                             sizeof(unsigned long long int));
+
+  __popcll_kernel<<<1, 1>>>(y.ptr(), 0);
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == 0);
+
+  unsigned long long int x = 0;
+  for (int i = 0; i < 64; ++i) {
+    __popcll_kernel<<<1, 1>>>(y.ptr(), x |= (1ull << i));
+    HIP_CHECK(hipDeviceSynchronize());
+
+    REQUIRE(y.ptr()[0] == i + 1);
+  }
+}
+
+__global__ void __mul24_kernel(int* y, int x1, int x2) { y[0] = __mul24(x1, x2); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__mul24(x,y)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___mul24_Sanity_Positive") {
+  LinearAllocGuard<int> y(LinearAllocs::hipMallocManaged, sizeof(int));
+
+  int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF);
+  int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF);
+
+  __mul24_kernel<<<1, 1>>>(y.ptr(), x1, x2);
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == x1 * x2);
+}
+
+__global__ void __umul24_kernel(unsigned int* y, unsigned int x1, unsigned int x2) {
+  y[0] = __umul24(x1, x2);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `__umul24(x,y)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/integer_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___umul24_Sanity_Positive") {
+  LinearAllocGuard<unsigned int> y(LinearAllocs::hipMallocManaged, sizeof(unsigned int));
+
+  unsigned int x1 = GENERATE(0, 42, 0xFFFFFF);
+  unsigned int x2 = GENERATE(0, 42, 0xFFFFFF);
+
+  __umul24_kernel<<<1, 1>>>(y.ptr(), x1, x2);
+  HIP_CHECK(hipDeviceSynchronize());
+
+  REQUIRE(y.ptr()[0] == x1 * x2);
+}
@@ -0,0 +1,67 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(func_name)                                            \
+  __global__ void func_name##_kernel_v1(int* x) { int result = func_name(x); }                     \
+  __global__ void func_name##_kernel_v2(Dummy x) { int result = func_name(x); }
+
+#define INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(func_name)                                       \
+  __global__ void func_name##_kernel_v1(long long int* x) { long long int result = func_name(x); } \
+  __global__ void func_name##_kernel_v2(Dummy x) { long long int result = func_name(x); }
+
+#define INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(func_name)                                           \
+  __global__ void func_name##_kernel_v1(int* x, int y) { int result = func_name(x, y); }           \
+  __global__ void func_name##_kernel_v2(int x, int* y) { int result = func_name(x, y); }           \
+  __global__ void func_name##_kernel_v3(Dummy x, int y) { int result = func_name(x, y); }          \
+  __global__ void func_name##_kernel_v4(int x, Dummy y) { int result = func_name(x, y); }
+
+#define INTRINSIC_BINARY_LONGLONG_NEGATIVE_KERNELS(func_name)                                      \
+  __global__ void func_name##_kernel_v1(long long int* x, long long int y) {                       \
+    long long int result = func_name(x, y);                                                        \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v2(long long int x, long long int* y) {                       \
+    long long int result = func_name##(x, y);                                                      \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v3(Dummy x, long long int y) {                                \
+    long long int result = func_name##(x, y);                                                      \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v4(long long int x, Dummy y) {                                \
+    long long int result = func_name##(x, y);                                                      \
+  }
+
+INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__brev)
+INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__clz)
+INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__ffs)
+INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__popc)
+INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__brevll)
+INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__clzll)
+INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__ffsll)
+INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__popcll)
+INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(__mul24)
@@ -0,0 +1,530 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+#include "unary_common.hh"
+#include "binary_common.hh"
+#include "ternary_common.hh"
+
+/********** Unary Functions **********/
+
+#define MATH_UNARY_SP_KERNEL_DEF(func_name)                                                        \
+  __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const xs) {      \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(xs[i]);                                                                    \
+    }                                                                                              \
+  }
+
+#define MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder)                        \
+  TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") {                                      \
+    UnarySinglePrecisionTest(func_name##_kernel, ref_func, validator_builder);                     \
+  }
+
+#define MATH_UNARY_SP_TEST_DEF(func_name, ref_func)                                                \
+  MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder)
+
+#define MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(func_name)                                             \
+  static std::unique_ptr<MatcherBase<float>> func_name##_validator_builder(float target, float x)
+
+
+static float __frcp_rn_ref(float x) { return 1.0f / x; }
+
+MATH_UNARY_SP_KERNEL_DEF(__frcp_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__frcp_rn(x)` for all possible inputs. The error bounds are
+ * IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF_IMPL(__frcp_rn, __frcp_rn_ref, EqValidatorBuilderFactory<float>());
+
+
+MATH_UNARY_SP_KERNEL_DEF(__fsqrt_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fsqrt_rn(x)` for all possible inputs. The results are
+ * compared against reference function `float std::sqrt(float)`. The error bounds are
+ * IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF_IMPL(__fsqrt_rn, static_cast<float (*)(float)>(std::sqrt),
+                            EqValidatorBuilderFactory<float>());
+
+
+static float __frsqrt_rn_ref(float x) { return 1.0f / std::sqrt(x); }
+
+MATH_UNARY_SP_KERNEL_DEF(__frsqrt_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__frsqrt_rn(x)` for all possible inputs. The results are
+ * compared against reference function `float std::sqrt(float)`. The error bounds are
+ * IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF_IMPL(__frsqrt_rn, __frsqrt_rn_ref, EqValidatorBuilderFactory<float>());
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__expf) {
+  const int64_t ulp_err = 2 + static_cast<int64_t>(std::floor(std::abs(1.16f * x)));
+  return ULPValidatorBuilderFactory<float>(ulp_err)(target);
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__expf);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__expf(x)` for all possible inputs. The results are
+ * compared against reference function `double std::exp(double)`. The maximum ulp error is `2 +
+ * floor(abs(1.16 * x))`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__expf, static_cast<double (*)(double)>(std::exp));
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__exp10f) {
+  const int64_t ulp_err = 2 + static_cast<int64_t>(std::floor(std::abs(2.95f * x)));
+  return ULPValidatorBuilderFactory<float>(ulp_err)(target);
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__exp10f);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__exp10f(x)` for all possible inputs. The results are
+ * compared against reference function `double exp10(double)`. The maximum ulp error is `2 +
+ * floor(abs(2.95 * x))`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__exp10f, static_cast<double (*)(double)>(exp10));
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__logf) {
+  if (0.5f <= x && x <= 2.0f) {
+    const auto abs_err = std::pow(2.0, -21.41);
+    return AbsValidatorBuilderFactory<float>(abs_err)(target);
+  } else {
+    return ULPValidatorBuilderFactory<float>(3)(target);
+  }
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__logf);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__logf(x)` for all possible inputs. The results are
+ * compared against reference function `double std::log(double)`. For `x` in [0.5, 2], the maximum
+ * absolute error is 2^-21.41, otherwise, the maximum ulp error is 3.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__logf, static_cast<double (*)(double)>(std::log));
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log2f) {
+  if (0.5f <= x && x <= 2.0f) {
+    const auto abs_err = std::pow(2.0, -22.0);
+    return AbsValidatorBuilderFactory<float>(abs_err)(target);
+  } else {
+    return ULPValidatorBuilderFactory<float>(2)(target);
+  }
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__log2f);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__log2f(x)` for all possible inputs. The results are
+ * compared against reference function `double std::log2(double)`. For `x` in [0.5, 2], the maximum
+ * absolute error is 2^-22, otherwise, the maximum ulp error is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__log2f, static_cast<double (*)(double)>(std::log2));
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log10f) {
+  if (0.5f <= x && x <= 2.0f) {
+    const auto abs_err = std::pow(2.0, -24.0);
+    return AbsValidatorBuilderFactory<float>(abs_err)(target);
+  } else {
+    return ULPValidatorBuilderFactory<float>(3)(target);
+  }
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__log10f);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__log10f(x)` for all possible inputs. The results are
+ * compared against reference function `double std::log10(double)`. For `x` in [0.5, 2], the maximum
+ * absolute error is 2^-24, otherwise, the maximum ulp error is 3.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__log10f, static_cast<double (*)(double)>(std::log10));
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__sinf) {
+  if (-M_PI <= x && x <= M_PI) {
+    const auto abs_err = std::pow(2.0, -21.41);
+    return AbsValidatorBuilderFactory<float>(abs_err)(target);
+  } else {
+    return NopValidatorBuilderFactory<float>()();
+  }
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__sinf);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__sinf(x)` for all possible inputs. The results are
+ * compared against reference function `double std::sin(double)`. For `x` in [-PI, PI], the maximum
+ * absolute error is 2^-21.41, and larger otherwise.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__sinf, static_cast<double (*)(double)>(std::sin));
+
+
+__device__ float __sincosf_sin(float x) {
+  float sin, cos;
+  __sincosf(x, &sin, &cos);
+  return sin;
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__sincosf_sin);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The
+ * results in `sptr` are compared against reference function `double std::sin(double)`. For `x` in
+ * [-PI, PI], the maximum absolute error is 2^-21.41, and larger otherwise.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_sin, static_cast<double (*)(double)>(std::sin),
+                            __sinf_validator_builder);
+
+
+MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__cosf) {
+  if (-M_PI <= x && x <= M_PI) {
+    const auto abs_err = std::pow(2.0, -21.19);
+    return AbsValidatorBuilderFactory<float>(abs_err)(target);
+  } else {
+    return NopValidatorBuilderFactory<float>()();
+  }
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__cosf);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__cosf(x)` for all possible inputs. The results are
+ * compared against reference function `double std::cos(double)`. For `x` in [-PI, PI], the maximum
+ * absolute error is 2^-21.19, and larger otherwise.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF(__cosf, static_cast<double (*)(double)>(std::cos));
+
+
+__device__ float __sincosf_cos(float x) {
+  float sin, cos;
+  __sincosf(x, &sin, &cos);
+  return cos;
+}
+
+MATH_UNARY_SP_KERNEL_DEF(__sincosf_cos);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The
+ * results in `cptr` are compared against reference function `double std::cos(double)`. For `x` in
+ * [-PI, PI], the maximum absolute error is 2^-21.19, and larger otherwise.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_cos, static_cast<double (*)(double)>(std::cos),
+                            __cosf_validator_builder);
+
+
+/********** Binary Functions **********/
+
+#define MATH_BINARY_SP_KERNEL_DEF(func_name)                                                       \
+  __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s,       \
+                                     float* const x2s) {                                           \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(x1s[i], x2s[i]);                                                           \
+    }                                                                                              \
+  }
+
+#define MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder)                       \
+  TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") {                                      \
+    BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder);                      \
+  }
+
+#define MATH_BINARY_SP_TEST_DEF(func_name, ref_func)                                               \
+  MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder)
+
+#define MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(func_name)                                            \
+  static std::unique_ptr<MatcherBase<float>> func_name##_validator_builder(float target, float x1, \
+                                                                           float x2)
+
+
+static float __fadd_rn_ref(float x1, float x2) { return x1 + x2; }
+
+MATH_BINARY_SP_KERNEL_DEF(__fadd_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fadd_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_SP_TEST_DEF_IMPL(__fadd_rn, __fadd_rn_ref, EqValidatorBuilderFactory<float>());
+
+
+static float __fsub_rn_ref(float x1, float x2) { return x1 - x2; }
+
+MATH_BINARY_SP_KERNEL_DEF(__fsub_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fsub_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_SP_TEST_DEF_IMPL(__fsub_rn, __fsub_rn_ref, EqValidatorBuilderFactory<float>());
+
+
+static float __fmul_rn_ref(float x1, float x2) { return x1 * x2; }
+
+MATH_BINARY_SP_KERNEL_DEF(__fmul_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fmul_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_SP_TEST_DEF_IMPL(__fmul_rn, __fmul_rn_ref, EqValidatorBuilderFactory<float>());
+
+
+static float __fdiv_rn_ref(float x1, float x2) { return x1 / x2; }
+
+MATH_BINARY_SP_KERNEL_DEF(__fdiv_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fdiv_rn(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_SP_TEST_DEF_IMPL(__fdiv_rn, __fdiv_rn_ref, EqValidatorBuilderFactory<float>());
+
+
+MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(__fdividef) {
+  const auto abs_x2 = std::abs(x2);
+  if (std::pow(2.0f, -126.0f) <= abs_x2 && abs_x2 <= std::pow(2.0f, 126.0f)) {
+    return ULPValidatorBuilderFactory<float>(2)(target);
+  } else {
+    return NopValidatorBuilderFactory<float>()();
+  }
+}
+
+MATH_BINARY_SP_KERNEL_DEF(__fdividef);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fdividef(x,y)` against a table of difficult values,
+ * followed by a large number of randomly generated values. For `|y|` in [2^-126, 2^126], the
+ * maximum ulp error is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_SP_TEST_DEF(__fdividef, __fdiv_rn_ref);
+
+
+/********** Ternary Functions **********/
+
+#define MATH_TERNARY_SP_KERNEL_DEF(func_name)                                                      \
+  __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s,       \
+                                     float* const x2s, float* const x3s) {                         \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(x1s[i], x2s[i], x3s[i]);                                                   \
+    }                                                                                              \
+  }
+
+#define MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder)                      \
+  TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") {                                      \
+    TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder);                     \
+  }
+
+#define MATH_TERNARY_SP_TEST_DEF(func_name, ref_func, validator_builder)                           \
+  MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder)
+
+#define MATH_TERNARY_SP_VALIDATOR_BUILDER_DEF(func_name)                                           \
+  static std::unique_ptr<MatcherBase<float>> func_name##_validator_builder(float target, float x1, \
+                                                                           float x2, float x3)
+
+
+MATH_TERNARY_SP_KERNEL_DEF(__fmaf_rn);
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `__fmaf(x,y,z)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/single_precision_intrinsics.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_TERNARY_SP_TEST_DEF_IMPL(__fmaf_rn, static_cast<float (*)(float, float, float)>(std::fma),
+                              EqValidatorBuilderFactory<float>());
@@ -0,0 +1,56 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(func_name)                                          \
+  __global__ void func_name##_kernel_v1(float* x) { float result = func_name(x); }                 \
+  __global__ void func_name##_kernel_v2(Dummy x) { float result = func_name(x); }
+
+#define INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(func_name)                                         \
+  __global__ void func_name##_kernel_v1(float* x, float y) { float result = func_name(x, y); }     \
+  __global__ void func_name##_kernel_v2(float x, float* y) { float result = func_name(x, y); }     \
+  __global__ void func_name##_kernel_v3(Dummy x, float y) { float result = func_name(x, y); }      \
+  __global__ void func_name##_kernel_v4(float x, Dummy y) { float result = func_name(x, y); }
+
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__fsqrt_rn)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__expf)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__exp10f)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__logf)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log2f)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log10f)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__sinf)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__cosf)
+INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__tanf)
+
+INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fadd_rn)
+INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fsub_rn)
+INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fmul_rn)
+INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdiv_rn)
+INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdividef)
+INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__powf)