EXSWHTEC-285 - Implement tests for exponential and power device math functions #229

Change-Id: I34ad7ee92960500bcd14dfd7d230ca8f8f77c172 [ROCm/hip-tests commit: 87d601411b]
2024-01-22 23:41:01 +05:30
@@ -26,6 +26,7 @@ set(TEST_SRC
  double_precision_intrinsics.cc
  integer_intrinsics.cc
  root_funcs.cc
+  pow_funcs.cc
 )

 if(HIP_PLATFORM MATCHES "nvidia")
@@ -86,3 +87,7 @@ add_test(NAME Unit_Device_root_3Dand4D_Negative
         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
         math_root_negative_kernels_3Dand4D.cc 56)
+add_test(NAME Unit_Device_pow_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         math_pow_negative_kernels.cc 76)
@@ -0,0 +1,92 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define NEGATIVE_KERNELS_SHELL_EXP(func_name)                                                      \
+  __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); }               \
+  __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); }                 \
+  __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); }             \
+  __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); }
+
+#define NEGATIVE_KERNELS_SHELL_INT_2ND(func_name)                                                  \
+  __global__ void func_name##_kernel_v1(double* x, int e) { double result = func_name(x, e); }     \
+  __global__ void func_name##_kernel_v2(Dummy x, int e) { double result = func_name(x, e); }       \
+  __global__ void func_name##_kernel_v3(double x, int* e) { double result = func_name(x, e); }     \
+  __global__ void func_name##_kernel_v4(double x, Dummy e) { double result = func_name(x, e); }    \
+  __global__ void func_name##f_kernel_v1(float* x, int e) { float result = func_name##f(x, e); }   \
+  __global__ void func_name##f_kernel_v2(Dummy x, int e) { float result = func_name##f(x, e); }    \
+  __global__ void func_name##f_kernel_v3(float x, int* e) { float result = func_name##f(x, e); }   \
+  __global__ void func_name##f_kernel_v4(float x, Dummy e) { float result = func_name##f(x, e); }
+
+
+NEGATIVE_KERNELS_SHELL_EXP(exp)
+NEGATIVE_KERNELS_SHELL_EXP(exp2)
+NEGATIVE_KERNELS_SHELL_EXP(exp10)
+NEGATIVE_KERNELS_SHELL_EXP(expm1)
+
+__global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); }
+__global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); }
+__global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); }
+__global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); }
+
+NEGATIVE_KERNELS_SHELL_INT_2ND(ldexp)
+
+__global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); }
+__global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); }
+__global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); }
+__global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); }
+__global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); }
+__global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); }
+__global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); }
+__global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); }
+
+NEGATIVE_KERNELS_SHELL_INT_2ND(powi)
+NEGATIVE_KERNELS_SHELL_INT_2ND(scalbn)
+
+__global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); }
+__global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); }
+__global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); }
+__global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); }
+__global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); }
+__global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); }
+__global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); }
+__global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); }
@@ -0,0 +1,150 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+/*
+Negative kernels used for the math pow negative Test Cases that are using RTC.
+*/
+
+static constexpr auto kExp{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void exp_kernel_v1(double* x) { double result = exp(x); }
+  __global__ void exp_kernel_v2(Dummy x) { double result = exp(x); }
+  __global__ void expf_kernel_v1(float* x) { float result = expf(x); }
+  __global__ void expf_kernel_v2(Dummy x) { float result = expf(x); }
+)"};
+
+static constexpr auto kExp2{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void exp2_kernel_v1(double* x) { double result = exp2(x); }
+  __global__ void exp2_kernel_v2(Dummy x) { double result = exp2(x); }
+  __global__ void exp2f_kernel_v1(float* x) { float result = exp2f(x); }
+  __global__ void exp2f_kernel_v2(Dummy x) { float result = exp2f(x); }
+)"};
+
+static constexpr auto kExp10{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void exp10_kernel_v1(double* x) { double result = exp10(x); }
+  __global__ void exp10_kernel_v2(Dummy x) { double result = exp10(x); }
+  __global__ void exp10f_kernel_v1(float* x) { float result = exp10f(x); }
+  __global__ void exp10f_kernel_v2(Dummy x) { float result = exp10f(x); }
+)"};
+
+static constexpr auto kExpm1{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void expm1_kernel_v1(double* x) { double result = expm1(x); }
+  __global__ void expm1_kernel_v2(Dummy x) { double result = expm1(x); }
+  __global__ void expm1f_kernel_v1(float* x) { float result = expm1f(x); }
+  __global__ void expm1f_kernel_v2(Dummy x) { float result = expm1f(x); }
+)"};
+
+static constexpr auto kFrexp{R"(
+  __global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); }
+  __global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); }
+  __global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); }
+)"};
+
+static constexpr auto kLdexp{R"(
+  __global__ void ldexp_kernel_v1(double* x, int e) { double result = ldexp(x, e); }
+  __global__ void ldexp_kernel_v2(Dummy x, int e) { double result = ldexp(x, e); }
+  __global__ void ldexp_kernel_v3(double x, int* e) { double result = ldexp(x, e); }
+  __global__ void ldexp_kernel_v4(double x, Dummy e) { double result = ldexp(x, e); }
+  __global__ void ldexpf_kernel_v1(float* x, int e) { float result = ldexpf(x, e); }
+  __global__ void ldexpf_kernel_v2(Dummy x, int e) { float result = ldexpf(x, e); }
+  __global__ void ldexpf_kernel_v3(float x, int* e) { float result = ldexpf(x, e); }
+  __global__ void ldexpf_kernel_v4(float x, Dummy e) { float result = ldexpf(x, e); }
+)"};
+
+static constexpr auto kPow{R"(
+  __global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); }
+  __global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); }
+  __global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); }
+  __global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); }
+  __global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); }
+  __global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); }
+  __global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); }
+  __global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); }
+)"};
+
+static constexpr auto kPowi{R"(
+  __global__ void powi_kernel_v1(double* x, int e) { double result = powi(x, e); }
+  __global__ void powi_kernel_v2(Dummy x, int e) { double result = powi(x, e); }
+  __global__ void powi_kernel_v3(double x, int* e) { double result = powi(x, e); }
+  __global__ void powi_kernel_v4(double x, Dummy e) { double result = powi(x, e); }
+  __global__ void powif_kernel_v1(float* x, int e) { float result = powif(x, e); }
+  __global__ void powif_kernel_v2(Dummy x, int e) { float result = powif(x, e); }
+  __global__ void powif_kernel_v3(float x, int* e) { float result = powif(x, e); }
+  __global__ void powif_kernel_v4(float x, Dummy e) { float result = powif(x, e); }
+)"};
+
+static constexpr auto kScalbn{R"(
+  __global__ void scalbn_kernel_v1(double* x, int e) { double result = scalbn(x, e); }
+  __global__ void scalbn_kernel_v2(Dummy x, int e) { double result = scalbn(x, e); }
+  __global__ void scalbn_kernel_v3(double x, int* e) { double result = scalbn(x, e); }
+  __global__ void scalbn_kernel_v4(double x, Dummy e) { double result = scalbn(x, e); }
+  __global__ void scalbnf_kernel_v1(float* x, int e) { float result = scalbnf(x, e); }
+  __global__ void scalbnf_kernel_v2(Dummy x, int e) { float result = scalbnf(x, e); }
+  __global__ void scalbnf_kernel_v3(float x, int* e) { float result = scalbnf(x, e); }
+  __global__ void scalbnf_kernel_v4(float x, Dummy e) { float result = scalbnf(x, e); }
+)"};
+
+static constexpr auto kScalbln{R"(
+  __global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); }
+  __global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); }
+  __global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); }
+  __global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); }
+  __global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); }
+  __global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); }
+  __global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); }
+  __global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); }
+)"};
@@ -277,6 +277,12 @@ inline constexpr std::array kSpecialValuesFloat{
    +0.0f,
 };

+inline constexpr std::array kSpecialValuesInt{
+    0, 1, 2, 3, 126, 127, 128, 1022, 1023, 1024, 0x02000001, 0x04000001, 1465264071, 1488522147,
+    std::numeric_limits<int>::max(), -1, -2, -3, -126, -127, -128, -1022, -1023, -11024, -0x02000001,
+    -0x04000001, -1465264071, -1488522147, std::numeric_limits<int>::min(), -std::numeric_limits<int>::max()
+};
+
 template <typename T> struct SpecialVals {
  const T* const data;
  const size_t size;
@@ -284,4 +290,5 @@ template <typename T> struct SpecialVals {

 inline constexpr auto kSpecialValRegistry =
    std::make_tuple(SpecialVals<float>{kSpecialValuesFloat.data(), kSpecialValuesFloat.size()},
-                    SpecialVals<double>{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()});
+                    SpecialVals<double>{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()},
+                    SpecialVals<int>{kSpecialValuesInt.data(), kSpecialValuesInt.size()});
@@ -0,0 +1,134 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+#include "math_common.hh"
+#include "math_special_values.hh"
+
+#include <hip/hip_cooperative_groups.h>
+
+namespace cg = cooperative_groups;
+
+#define MATH_POW_INT_KERNEL_DEF(func_name)                                                         \
+  template <typename T1, typename T2>                                                              \
+  __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T1* const x1s,             \
+                                     T2* const x2s) {                                              \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      if constexpr (std::is_same_v<float, T1>) {                                                   \
+        ys[i] = func_name##f(x1s[i], x2s[i]);                                                      \
+      } else if constexpr (std::is_same_v<double, T1>) {                                           \
+        ys[i] = func_name(x1s[i], x2s[i]);                                                         \
+      }                                                                                            \
+    }                                                                                              \
+  }
+
+template <typename T1, typename T2>
+using kernel_pow_int_sig = void (*)(T1*, const size_t, T1*, T2*);
+
+template <typename T1, typename T2> using ref_pow_int_sig = T1 (*)(T1, T2);
+
+template <typename T1, typename T2, typename RT1, typename RT2, typename ValidatorBuilder>
+void PowIntFloatingPointBruteForceTest(kernel_pow_int_sig<T1, T2> kernel,
+                                       ref_pow_int_sig<RT1, RT2> ref_func,
+                                       const ValidatorBuilder& validator_builder) {
+  const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
+  const uint64_t num_iterations = GetTestIterationCount();
+  const auto max_batch_size =
+      std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) * 2 + sizeof(T2)), num_iterations);
+  LinearAllocGuard<T1> x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T1)};
+  LinearAllocGuard<T2> x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)};
+
+  MathTest math_test(kernel, max_batch_size);
+
+  auto batch_size = max_batch_size;
+  const auto num_threads = thread_pool.thread_count();
+  for (uint64_t i = 0ul; i < num_iterations; i += batch_size) {
+    batch_size = std::min<uint64_t>(max_batch_size, num_iterations - i);
+
+    const auto min_sub_batch_size = batch_size / num_threads;
+    const auto tail = batch_size % num_threads;
+
+    auto base_idx = 0u;
+    for (auto i = 0u; i < num_threads; ++i) {
+      const auto sub_batch_size = min_sub_batch_size + (i < tail);
+      thread_pool.Post([=, &x1s, &x2s] {
+        const auto generator1 = [=] {
+          static thread_local std::mt19937 rng(std::random_device{}());
+          std::uniform_real_distribution<RefType_t<T1>> unif_dist(std::numeric_limits<T1>::lowest(),
+                                                                  std::numeric_limits<T1>::max());
+          return static_cast<T1>(unif_dist(rng));
+        };
+        const auto generator2 = [] {
+          static thread_local std::mt19937 rng(std::random_device{}());
+          std::uniform_int_distribution<T2> unif_dist(std::numeric_limits<T2>::lowest(),
+                                                      std::numeric_limits<T2>::max());
+          return unif_dist(rng);
+        };
+        std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator1);
+        std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator2);
+      });
+      base_idx += sub_batch_size;
+    }
+
+    thread_pool.Wait();
+
+    math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(),
+                  x2s.ptr());
+  }
+}
+
+template <typename T1, typename T2, typename RT1, typename RT2, typename ValidatorBuilder>
+void PowIntFloatingPointSpecialValuesTest(kernel_pow_int_sig<T1, T2> kernel,
+                                          ref_pow_int_sig<RT1, RT2> ref_func,
+                                          const ValidatorBuilder& validator_builder) {
+  const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
+  const auto values1 = std::get<SpecialVals<T1>>(kSpecialValRegistry);
+  const auto values2 = std::get<SpecialVals<int>>(kSpecialValRegistry);
+
+  const auto size = values1.size * values2.size;
+  LinearAllocGuard<T1> x1s{LinearAllocs::hipHostMalloc, size * sizeof(T1)};
+  LinearAllocGuard<T2> x2s{LinearAllocs::hipHostMalloc, size * sizeof(T2)};
+
+  for (auto i = 0u; i < values1.size; ++i) {
+    for (auto j = 0u; j < values2.size; ++j) {
+      x1s.ptr()[i * values2.size + j] = values1.data[i];
+      x2s.ptr()[i * values2.size + j] = static_cast<T2>(values2.data[j]);
+    }
+  }
+
+  MathTest math_test(kernel, size);
+  math_test.template Run<false>(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(),
+                                x2s.ptr());
+}
+
+template <typename T1, typename T2, typename RT1, typename RT2, typename ValidatorBuilder>
+void PowIntFloatingPointTest(kernel_pow_int_sig<T1, T2> kernel, ref_pow_int_sig<RT1, RT2> ref_func,
+                             const ValidatorBuilder& validator_builder) {
+  SECTION("Special values") {
+    PowIntFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder);
+  }
+
+  SECTION("Brute force") { PowIntFloatingPointBruteForceTest(kernel, ref_func, validator_builder); }
+}
@@ -0,0 +1,455 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "unary_common.hh"
+#include "binary_common.hh"
+#include "pow_common.hh"
+#include "math_pow_negative_kernels_rtc.hh"
+
+/**
+ * @addtogroup PowMathFuncs PowMathFuncs
+ * @{
+ * @ingroup MathTest
+ */
+
+/********** Unary Functions **********/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `expf(x)` for all possible inputs and `exp(x)` against a
+ * table of difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::exp(T)`. The maximum ulp error for single
+ * precision is 2 and for double precision is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp, 2, 1)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for expf and exp.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_exp_expf_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `exp2f(x)` for all possible inputs and `exp2(x)` against a
+ * table of difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::exp2(T)`. The maximum ulp error for single
+ * precision is 2 and for double precision is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp2, 2, 1)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for exp2f and exp2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_exp2_exp2f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp2); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `expm1f(x)` for all possible inputs and `expm1(x)` against a
+ * table of difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::exp(T)`. The maximum ulp error is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(expm1, 1, 1)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for expm1f and expm1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_expm1_expm1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExpm1); }
+
+MATH_UNARY_KERNEL_DEF(exp10)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `exp10f(x)` for all possible inputs. The maximum ulp error
+ * is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_exp10f_Accuracy_Positive") {
+  auto exp10_ref = [](double arg) -> double { return std::pow(10, arg); };
+  double (*ref)(double) = exp10_ref;
+  UnarySinglePrecisionTest(exp10_kernel<float>, ref, ULPValidatorBuilderFactory<float>(2));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `exp10(x)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The maximum ulp error is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_exp10_Accuracy_Positive") {
+  auto exp10_ref = [](long double arg) -> long double { return std::pow(10, arg); };
+  long double (*ref)(long double) = exp10_ref;
+  UnaryDoublePrecisionTest(exp10_kernel<double>, ref, ULPValidatorBuilderFactory<double>(1));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for exp10f and exp10.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_exp10_exp10f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp10); }
+
+template <typename T>
+__global__ void frexp_kernel(std::pair<T, int>* const ys, const size_t num_xs, T* const xs) {
+  const auto tid = cg::this_grid().thread_rank();
+  const auto stride = cg::this_grid().size();
+
+  for (auto i = tid; i < num_xs; i += stride) {
+    if constexpr (std::is_same_v<float, T>) {
+      ys[i].first = frexpf(xs[i], &ys[i].second);
+    } else if constexpr (std::is_same_v<double, T>) {
+      ys[i].first = frexp(xs[i], &ys[i].second);
+    }
+  }
+}
+
+template <typename T> std::pair<T, int> frexp_ref(T arg) {
+  int exp_v;
+  T res = std::frexp(arg, &exp_v);
+  return {res, exp_v};
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `frexpf(x, exp)` for all possible inputs. The results are
+ * compared against reference function `double std::frexp(double, int*)`. The maximum ulp error is
+ * 0.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_frexpf_Accuracy_Positive") {
+  UnarySinglePrecisionTest(
+      frexp_kernel<float>, frexp_ref<double>,
+      PairValidatorBuilderFactory<float, int>(ULPValidatorBuilderFactory<float>(0),
+                                              EqValidatorBuilderFactory<int>()));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `frexp(x, exp)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The results are
+ * compared against reference function `long double std::frexp(long double, int*)`. The maximum ulp
+ * error is 0.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_frexp_Accuracy_Positive") {
+  UnaryDoublePrecisionTest(
+      frexp_kernel<double>, frexp_ref<long double>,
+      PairValidatorBuilderFactory<double, int>(ULPValidatorBuilderFactory<double>(0),
+                                               EqValidatorBuilderFactory<int>()));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for frexpf and frexp.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_frexp_frexpf_Negative_RTC") { NegativeTestRTCWrapper<20>(kFrexp); }
+
+
+/********** Binary Functions **********/
+
+MATH_BINARY_KERNEL_DEF(pow)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `powf(x, y)` and `pow(x, y)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::pow(T, T)`. The maximum ulp error
+ * for single precision is 4 and for double precision is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_pow_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto pow_ref = [](RT arg1, RT arg2) -> RT {
+    if (std::isinf(arg1) && arg2 < 0) return 0;
+    return std::pow(arg1, arg2);
+  };
+  RT (*ref)(RT, RT) = pow_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 4 : 2;
+  BinaryFloatingPointTest(pow_kernel<TestType>, ref, ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for powf and pow.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_pow_powf_Negative_RTC") { NegativeTestRTCWrapper<8>(kPow); }
+
+MATH_POW_INT_KERNEL_DEF(ldexp)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `ldexpf(x, exp)` and `ldexp(x, exp)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::ldexp(T, int)`. The maximum ulp error is 0.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_ldexp_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  RT (*ref)(RT, int) = std::ldexp;
+  PowIntFloatingPointTest(ldexp_kernel<TestType, int>, ref,
+                          ULPValidatorBuilderFactory<TestType>(0));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for ldexpf and ldexp.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_ldexp_ldexpf_Negative_RTC") { NegativeTestRTCWrapper<8>(kLdexp); }
+
+MATH_POW_INT_KERNEL_DEF(powi)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `powi(x, exp)` and `powi(x, exp)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::pow(T, T)`. The maximum ulp error
+ * for single precision is 4 and for double precision is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_powi_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto pow_ref = [](RT arg1, int arg2) -> RT {
+    if (std::isinf(arg1) && arg2 < 0) return 0;
+    return std::pow(arg1, static_cast<RT>(arg2));
+  };
+  RT (*ref)(RT, int) = pow_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 4 : 2;
+  PowIntFloatingPointTest(powi_kernel<TestType, int>, ref,
+                          ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for powif and powi.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_powi_powif_Negative_RTC") { NegativeTestRTCWrapper<8>(kPowi); }
+
+MATH_POW_INT_KERNEL_DEF(scalbn)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `scalbnf(x, n)` and `scalbn(x, n)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::scalbn(T, int)`. The maximum ulp error is 0.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_scalbn_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  RT (*ref)(RT, int) = std::scalbn;
+  PowIntFloatingPointTest(scalbn_kernel<TestType, int>, ref,
+                          ULPValidatorBuilderFactory<TestType>(0));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for scalbnf and scalbn.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_scalbn_scalbnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbn); }
+
+MATH_POW_INT_KERNEL_DEF(scalbln)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `scalblnf(x, l)` and `scalbln(x, l)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::scalbn(T, long int)`. The maximum ulp error is 0.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_scalbln_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  RT (*ref)(RT, long int) = std::scalbln;
+  PowIntFloatingPointTest(scalbln_kernel<TestType, long int>, ref,
+                          ULPValidatorBuilderFactory<TestType>(0));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for scalblnf and scalbln.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/pow_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_scalbln_scalblnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbln); }