EXSWHTEC-305 - Implement tests for double type casting intrinsics #283

Change-Id: Iacf67376949eed4a84f7e9e95bb51fd31b5ec6a4 [ROCm/hip-tests commit: 59d6807cdb]
2024-01-24 00:57:39 +05:30
@@ -29,6 +29,7 @@ set(TEST_SRC
  pow_funcs.cc
  log_funcs.cc
  special_funcs.cc
+  casting_double_funcs.cc
 )

 if(HIP_PLATFORM MATCHES "nvidia")
@@ -101,3 +102,7 @@ add_test(NAME Unit_Device_special_funcs_Negative
         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
         math_special_func_kernels.cc 76)
+add_test(NAME Unit_Device_casting_double_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         casting_double_negative_kernels.cc 69)
@@ -0,0 +1,195 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+#include "unary_common.hh"
+#include <fenv.h>
+
+namespace cg = cooperative_groups;
+
+#define CAST_KERNEL_DEF(func_name, T1, T2)                                                         \
+  __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T2* const xs) {            \
+    const auto tid = cg::this_grid().thread_rank();                                                \
+    const auto stride = cg::this_grid().size();                                                    \
+                                                                                                   \
+    for (auto i = tid; i < num_xs; i += stride) {                                                  \
+      ys[i] = func_name(xs[i]);                                                                    \
+    }                                                                                              \
+  }
+
+#define CAST_F2I_REF_DEF(func_name, T1, T2, ref_func)                                              \
+  T1 func_name##_ref(T2 arg) {                                                                     \
+    if (arg >= static_cast<T2>(std::numeric_limits<T1>::max()))                                    \
+      return std::numeric_limits<T1>::max();                                                       \
+    else if (arg <= static_cast<T2>(std::numeric_limits<T1>::min()))                               \
+      return std::numeric_limits<T1>::min();                                                       \
+    T2 result = ref_func(arg);                                                                     \
+    return result;                                                                                 \
+  }
+
+#define CAST_F2I_RZ_REF_DEF(func_name, T1, T2)                                                     \
+  T1 func_name##_ref(T2 arg) {                                                                     \
+    if (arg >= static_cast<double>(std::numeric_limits<T1>::max()))                                \
+      return std::numeric_limits<T1>::max();                                                       \
+    else if (arg <= static_cast<double>(std::numeric_limits<T1>::min()))                           \
+      return std::numeric_limits<T1>::min();                                                       \
+    T1 result = static_cast<T1>(arg);                                                              \
+    return result;                                                                                 \
+  }
+
+#define CAST_RND_REF_DEF(func_name, T1, T2, round_dir)                                             \
+  T1 func_name##_ref(T2 arg) {                                                                     \
+    int curr_direction = fegetround();                                                             \
+    fesetround(round_dir);                                                                         \
+    T1 result = static_cast<T1>(arg);                                                              \
+    fesetround(curr_direction);                                                                    \
+    return result;                                                                                 \
+  }
+
+#define CAST_REF_DEF(func_name, T1, T2)                                                            \
+  T1 func_name##_ref(T2 arg) {                                                                     \
+    T1 result = static_cast<T1>(arg);                                                              \
+    return result;                                                                                 \
+  }
+
+
+template <typename T1, typename T2> T1 type2_as_type1_ref(T2 arg) {
+  T1 tmp;
+  memcpy(&tmp, &arg, sizeof(tmp));
+  return tmp;
+}
+
+template <typename T, typename ValidatorBuilder>
+void CastDoublePrecisionSpecialValuesTest(kernel_sig<T, double> kernel, ref_sig<T, double> ref_func,
+                                          const ValidatorBuilder& validator_builder) {
+  const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
+  const auto values = std::get<SpecialVals<double>>(kSpecialValRegistry);
+  std::vector<double> spec_values;
+
+  if (!std::is_same_v<float, T> && !std::is_same_v<double, T> && !std::is_same_v<long double, T>) {
+    for (int i = 0; i < values.size; i++) {
+      if (!std::isnan(values.data[i]) && !std::isinf(values.data[i])) {
+        spec_values.push_back(values.data[i]);
+      }
+    }
+  }
+
+  MathTest math_test(kernel, spec_values.size());
+  math_test.template Run<false>(validator_builder, grid_size, block_size, ref_func,
+                                spec_values.size(), spec_values.data());
+}
+
+template <typename T, typename ValidatorBuilder>
+void CastDoublePrecisionTest(kernel_sig<T, double> kernel, ref_sig<T, double> ref,
+                             const ValidatorBuilder& validator_builder) {
+  SECTION("Special values") {
+    CastDoublePrecisionSpecialValuesTest(kernel, ref, validator_builder);
+  }
+
+  SECTION("Brute force") { UnaryDoublePrecisionBruteForceTest(kernel, ref, validator_builder); }
+}
+
+template <typename T, typename TArg, typename RT, typename RTArg, typename ValidatorBuilder>
+void CastIntRangeTest(kernel_sig<T, TArg> kernel, ref_sig<RT, RTArg> ref_func,
+                      const ValidatorBuilder& validator_builder,
+                      const TArg a = std::numeric_limits<TArg>::lowest(),
+                      const TArg b = std::numeric_limits<TArg>::max()) {
+  const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
+  const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg));
+  LinearAllocGuard<TArg> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)};
+
+  MathTest math_test(kernel, max_batch_size);
+
+  size_t inserted = 0u;
+  for (TArg v = a; v <= b; v++) {
+    values.ptr()[inserted++] = v;
+    if (inserted < max_batch_size) continue;
+
+    math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr());
+    inserted = 0u;
+  }
+}
+
+template <typename T, typename TArg, typename RT, typename RTArg, typename ValidatorBuilder>
+void CastIntBruteForceTest(kernel_sig<T, TArg> kernel, ref_sig<RT, RTArg> ref_func,
+                           const ValidatorBuilder& validator_builder,
+                           const TArg a = std::numeric_limits<TArg>::lowest(),
+                           const TArg b = std::numeric_limits<TArg>::max()) {
+  const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
+  const uint64_t num_iterations = GetTestIterationCount();
+  const auto max_batch_size =
+      std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg)), num_iterations);
+  LinearAllocGuard<TArg> values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)};
+
+  MathTest math_test(kernel, max_batch_size);
+
+  auto batch_size = max_batch_size;
+  const auto num_threads = thread_pool.thread_count();
+  for (uint64_t i = 0ul; i < num_iterations; i += batch_size) {
+    batch_size = std::min<uint64_t>(max_batch_size, num_iterations - i);
+
+    const auto min_sub_batch_size = batch_size / num_threads;
+    const auto tail = batch_size % num_threads;
+
+    auto base_idx = 0u;
+    for (auto i = 0u; i < num_threads; ++i) {
+      const auto sub_batch_size = min_sub_batch_size + (i < tail);
+      thread_pool.Post([=, &values] {
+        const auto generator = [=] {
+          static thread_local std::mt19937 rng(std::random_device{}());
+          std::uniform_int_distribution<TArg> unif_dist(a, b);
+          return static_cast<TArg>(unif_dist(rng));
+        };
+        std::generate(values.ptr() + base_idx, values.ptr() + base_idx + sub_batch_size, generator);
+      });
+      base_idx += sub_batch_size;
+    }
+
+    thread_pool.Wait();
+
+    math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr());
+  }
+}
+
+template <typename T1, typename T2, typename ValidatorBuilder>
+void CastBinaryIntRangeTest(kernel_sig<T1, T2, T2> kernel, ref_sig<T1, T2, T2> ref_func,
+                            const ValidatorBuilder& validator_builder,
+                            const T2 a = std::numeric_limits<T2>::lowest(),
+                            const T2 b = std::numeric_limits<T2>::max()) {
+  const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel);
+  const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) + 2 * sizeof(T2));
+  LinearAllocGuard<T2> values1{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)};
+  LinearAllocGuard<T2> values2{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)};
+
+  MathTest math_test(kernel, max_batch_size);
+
+  size_t inserted = 0u;
+  for (T2 v = a; v <= b; v++) {
+    values1.ptr()[inserted] = v;
+    values2.ptr()[inserted++] = b - v;
+    if (inserted < max_batch_size) continue;
+
+    math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values1.ptr(),
+                  values2.ptr());
+    inserted = 0u;
+  }
+}
@@ -0,0 +1,597 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "casting_common.hh"
+#include "casting_double_negative_kernels_rtc.hh"
+
+/**
+ * @addtogroup CastingDoubleType CastingDoubleType
+ * @{
+ * @ingroup MathTest
+ */
+
+#define CAST_DOUBLE2INT_TEST_DEF(kern_name, T, ref_func)                                           \
+  CAST_KERNEL_DEF(kern_name, T, double)                                                            \
+  CAST_F2I_REF_DEF(kern_name, T, double, ref_func)                                                 \
+                                                                                                   \
+  TEST_CASE("Unit_Device_" #kern_name "_Positive") {                                               \
+    T (*ref)(double) = kern_name##_ref;                                                            \
+    CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<T>());              \
+  }
+
+#define CAST_DOUBLE2INT_RZ_TEST_DEF(kern_name, T)                                                  \
+  CAST_KERNEL_DEF(kern_name, T, double)                                                            \
+  CAST_F2I_RZ_REF_DEF(kern_name, T, double)                                                        \
+                                                                                                   \
+  TEST_CASE("Unit_Device_" #kern_name "_Positive") {                                               \
+    T (*ref)(double) = kern_name##_ref;                                                            \
+    CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<T>());              \
+  }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2int_rd` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::floor`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_TEST_DEF(__double2int_rd, int, std::floor)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2int_rn` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::rint`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_TEST_DEF(__double2int_rn, int, std::rint)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2int_ru` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::ceil`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_TEST_DEF(__double2int_ru, int, std::ceil)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2int_rz` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function which
+ * performs cast to int.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_RZ_TEST_DEF(__double2int_rz, int)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2int_[rd,rn,ru,rz].
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2int_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Int); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2uint_rd` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * `std::floor`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_TEST_DEF(__double2uint_rd, unsigned int, std::floor)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2uint_rn` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * `std::rint`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_TEST_DEF(__double2uint_rn, unsigned int, std::rint)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2uint_ru` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * `std::ceil`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_TEST_DEF(__double2uint_ru, unsigned int, std::ceil)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2uint_rz` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * which performs cast to unsigned int.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2INT_RZ_TEST_DEF(__double2uint_rz, unsigned int)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2uint_[rd,rn,ru,rz].
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2uint_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Uint); }
+
+#define CAST_DOUBLE2LL_TEST_DEF(kern_name, T, ref_func)                                            \
+  CAST_KERNEL_DEF(kern_name, T, double)                                                            \
+  CAST_F2I_REF_DEF(kern_name, T, double, ref_func)                                                 \
+                                                                                                   \
+  TEST_CASE("Unit_Device_" #kern_name "_Positive") {                                               \
+    T (*ref)(double) = kern_name##_ref;                                                            \
+    UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<T>(),    \
+                                       static_cast<double>(std::numeric_limits<T>::min()),         \
+                                       static_cast<double>(std::numeric_limits<T>::max()));        \
+  }
+
+#define CAST_DOUBLE2LL_RZ_TEST_DEF(kern_name, T)                                                   \
+  CAST_KERNEL_DEF(kern_name, T, double)                                                            \
+  CAST_F2I_RZ_REF_DEF(kern_name, T, double)                                                        \
+                                                                                                   \
+  TEST_CASE("Unit_Device_" #kern_name "_Positive") {                                               \
+    T (*ref)(double) = kern_name##_ref;                                                            \
+    UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<T>(),    \
+                                       static_cast<double>(std::numeric_limits<T>::min()),         \
+                                       static_cast<double>(std::numeric_limits<T>::max()));        \
+  }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ll_rd` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::floor`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_TEST_DEF(__double2ll_rd, long long int, std::floor)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ll_rn` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::rint`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_TEST_DEF(__double2ll_rn, long long int, std::rint)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ll_ru` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::ceil`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_TEST_DEF(__double2ll_ru, long long int, std::ceil)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ll_rz` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function which
+ * performs cast to long long int.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_RZ_TEST_DEF(__double2ll_rz, long long int)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2ll_[rd,rn,ru,rz].
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2ll_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2LL); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ull_rd` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::floor`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_TEST_DEF(__double2ull_rd, unsigned long long int, std::floor)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ull_rn` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::rint`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_TEST_DEF(__double2ull_rn, unsigned long long int, std::rint)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ull_ru` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function
+ * `std::ceil`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_TEST_DEF(__double2ull_ru, unsigned long long int, std::ceil)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2ull_rz` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function which
+ * performs cast to unsigned long long int.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2LL_RZ_TEST_DEF(__double2ull_rz, unsigned long long int)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2ull_[rd,rn,ru,rz].
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2ull_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2ULL); }
+
+#define CAST_DOUBLE2FLOAT_TEST_DEF(kern_name, round_dir)                                           \
+  CAST_KERNEL_DEF(kern_name, float, double)                                                        \
+  CAST_RND_REF_DEF(kern_name, float, double, round_dir)                                            \
+                                                                                                   \
+  TEST_CASE("Unit_Device_" #kern_name "_Positive") {                                               \
+    float (*ref)(double) = kern_name##_ref;                                                        \
+    CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<float>());          \
+  }
+
+#define CAST_DOUBLE2FLOAT_RN_TEST_DEF(kern_name)                                                   \
+  CAST_KERNEL_DEF(kern_name, float, double)                                                        \
+  CAST_REF_DEF(kern_name, float, double)                                                           \
+                                                                                                   \
+  TEST_CASE("Unit_Device_" #kern_name "_Positive") {                                               \
+    float (*ref)(double) = kern_name##_ref;                                                        \
+    CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory<float>());          \
+  }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2float_rd` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * which performs cast to float with rounding mode FE_DOWNWARD.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_rd, FE_DOWNWARD)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2float_rn` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * which performs cast to float.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2FLOAT_RN_TEST_DEF(__double2float_rn)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2float_ru` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * which performs cast to float with rounding mode FE_UPWARD.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_ru, FE_UPWARD)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2float_rz` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * which performs cast to float with rounding mode FE_TOWARDZERO.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_rz, FE_TOWARDZERO)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2float_[rd,rn,ru,rz].
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Float); }
+
+CAST_KERNEL_DEF(__double2hiint, int, double)
+
+int __double2hiint_ref(double arg) {
+  int tmp[2];
+  memcpy(tmp, &arg, sizeof(tmp));
+  return tmp[1];
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2hiint` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function which
+ * performs copy of higher part of double value to int variable.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2hiint_Positive") {
+  int (*ref)(double) = __double2hiint_ref;
+  CastDoublePrecisionTest(__double2hiint_kernel, ref, EqValidatorBuilderFactory<int>());
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2hiint.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2hiint_Negative_RTC") { NegativeTestRTCWrapper<3>(kDouble2Hiint); }
+
+CAST_KERNEL_DEF(__double2loint, int, double)
+
+int __double2loint_ref(double arg) {
+  int tmp[2];
+  memcpy(tmp, &arg, sizeof(tmp));
+  return tmp[0];
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double2loint` against a table of difficult values, followed by a large
+ * number of randomly generated values. The results are compared against reference function which
+ * performs copy of lower part of double value to int variable.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2loint_Positive") {
+  int (*ref)(double) = __double2loint_ref;
+  CastDoublePrecisionTest(__double2loint_kernel, ref, EqValidatorBuilderFactory<int>());
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double2loint.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double2loint_Negative_RTC") { NegativeTestRTCWrapper<3>(kDouble2Loint); }
+
+CAST_KERNEL_DEF(__double_as_longlong, long long int, double)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests that checks `__double_as_longlong` against a table of difficult values, followed by a
+ * large number of randomly generated values. The results are compared against reference function
+ * which performs copy of double value to long long int variable.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double_as_longlong_Positive") {
+  long long int (*ref)(double) = type2_as_type1_ref<long long int, double>;
+  CastDoublePrecisionTest(__double_as_longlong_kernel, ref,
+                          EqValidatorBuilderFactory<long long int>());
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for __double_as_longlong.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/casting_double_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device___double_as_longlong_Negative_RTC") {
+  NegativeTestRTCWrapper<3>(kDoubleAsLonglong);
+}
@@ -0,0 +1,55 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define NEGATIVE_KERNELS_SHELL(func_name, T)                                                       \
+  __global__ void func_name##_kernel_v1(T* result, double* x) { *result = func_name(x); }          \
+  __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); }            \
+  __global__ void func_name##_kernel_v3(Dummy* result, double x) { *result = func_name(x); }
+
+NEGATIVE_KERNELS_SHELL(__double2int_rd, int)
+NEGATIVE_KERNELS_SHELL(__double2int_rn, int)
+NEGATIVE_KERNELS_SHELL(__double2int_ru, int)
+NEGATIVE_KERNELS_SHELL(__double2int_rz, int)
+NEGATIVE_KERNELS_SHELL(__double2uint_rd, unsigned int)
+NEGATIVE_KERNELS_SHELL(__double2uint_rn, unsigned int)
+NEGATIVE_KERNELS_SHELL(__double2uint_ru, unsigned int)
+NEGATIVE_KERNELS_SHELL(__double2uint_rz, unsigned int)
+NEGATIVE_KERNELS_SHELL(__double2ll_rd, long long int)
+NEGATIVE_KERNELS_SHELL(__double2ll_rn, long long int)
+NEGATIVE_KERNELS_SHELL(__double2ll_ru, long long int)
+NEGATIVE_KERNELS_SHELL(__double2ll_rz, long long int)
+NEGATIVE_KERNELS_SHELL(__double2ull_rd, unsigned long long int)
+NEGATIVE_KERNELS_SHELL(__double2ull_rn, unsigned long long int)
+NEGATIVE_KERNELS_SHELL(__double2ull_ru, unsigned long long int)
+NEGATIVE_KERNELS_SHELL(__double2ull_rz, unsigned long long int)
+NEGATIVE_KERNELS_SHELL(__double2float_rd, float)
+NEGATIVE_KERNELS_SHELL(__double2float_rn, float)
+NEGATIVE_KERNELS_SHELL(__double2float_ru, float)
+NEGATIVE_KERNELS_SHELL(__double2float_rz, float)
+NEGATIVE_KERNELS_SHELL(__double2hiint, int)
+NEGATIVE_KERNELS_SHELL(__double2loint, int)
+NEGATIVE_KERNELS_SHELL(__double_as_longlong, long long int)
@@ -0,0 +1,157 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+/*
+Negative kernels used for the double type casting negative Test Cases that are using RTC.
+*/
+
+static constexpr auto kDouble2Int{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2int_rd_kernel_v1(int* result, double* x) { *result = __double2int_rd(x); }
+  __global__ void double2int_rd_kernel_v2(int* result, Dummy x) { *result = __double2int_rd(x); }
+  __global__ void double2int_rd_kernel_v3(Dummy* result, double x) { *result = __double2int_rd(x); }
+  __global__ void double2int_rn_kernel_v1(int* result, double* x) { *result = __double2int_rn(x); }
+  __global__ void double2int_rn_kernel_v2(int* result, Dummy x) { *result = __double2int_rn(x); }
+  __global__ void double2int_rn_kernel_v3(Dummy* result, double x) { *result = __double2int_rn(x); }
+  __global__ void double2int_ru_kernel_v1(int* result, double* x) { *result = __double2int_ru(x); }
+  __global__ void double2int_ru_kernel_v2(int* result, Dummy x) { *result = __double2int_ru(x); }
+  __global__ void double2int_ru_kernel_v3(Dummy* result, double x) { *result = __double2int_ru(x); }
+  __global__ void double2int_rz_kernel_v1(int* result, double* x) { *result = __double2int_rz(x); }
+  __global__ void double2int_rz_kernel_v2(int* result, Dummy x) { *result = __double2int_rz(x); }
+  __global__ void double2int_rz_kernel_v3(Dummy* result, double x) { *result = __double2int_rz(x); }
+)"};
+
+static constexpr auto kDouble2Uint{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2uint_rd_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rd(x); }
+  __global__ void double2uint_rd_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rd(x); }
+  __global__ void double2uint_rd_kernel_v3(Dummy* result, double x) { *result = __double2uint_rd(x); }
+  __global__ void double2uint_rn_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rn(x); }
+  __global__ void double2uint_rn_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rn(x); }
+  __global__ void double2uint_rn_kernel_v3(Dummy* result, double x) { *result = __double2uint_rn(x); }
+  __global__ void double2uint_ru_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_ru(x); }
+  __global__ void double2uint_ru_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_ru(x); }
+  __global__ void double2uint_ru_kernel_v3(Dummy* result, double x) { *result = __double2uint_ru(x); }
+  __global__ void double2uint_rz_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rz(x); }
+  __global__ void double2uint_rz_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rz(x); }
+  __global__ void double2uint_rz_kernel_v3(Dummy* result, double x) { *result = __double2uint_rz(x); }
+)"};
+
+static constexpr auto kDouble2LL{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2ll_rd_kernel_v1(long long int* result, double* x) { *result = __double2ll_rd(x); }
+  __global__ void double2ll_rd_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rd(x); }
+  __global__ void double2ll_rd_kernel_v3(Dummy* result, double x) { *result = __double2ll_rd(x); }
+  __global__ void double2ll_rn_kernel_v1(long long int* result, double* x) { *result = __double2ll_rn(x); }
+  __global__ void double2ll_rn_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rn(x); }
+  __global__ void double2ll_rn_kernel_v3(Dummy* result, double x) { *result = __double2ll_rn(x); }
+  __global__ void double2ll_ru_kernel_v1(long long int* result, double* x) { *result = __double2ll_ru(x); }
+  __global__ void double2ll_ru_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_ru(x); }
+  __global__ void double2ll_ru_kernel_v3(Dummy* result, double x) { *result = __double2ll_ru(x); }
+  __global__ void double2ll_rz_kernel_v1(long long int* result, double* x) { *result = __double2ll_rz(x); }
+  __global__ void double2ll_rz_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rz(x); }
+  __global__ void double2ll_rz_kernel_v3(Dummy* result, double x) { *result = __double2ll_rz(x); }
+)"};
+
+static constexpr auto kDouble2ULL{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2ull_rd_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rd(x); }
+  __global__ void double2ull_rd_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rd(x); }
+  __global__ void double2ull_rd_kernel_v3(Dummy* result, double x) { *result = __double2ull_rd(x); }
+  __global__ void double2ull_rn_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rn(x); }
+  __global__ void double2ull_rn_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rn(x); }
+  __global__ void double2ull_rn_kernel_v3(Dummy* result, double x) { *result = __double2ull_rn(x); }
+  __global__ void double2ull_ru_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_ru(x); }
+  __global__ void double2ull_ru_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_ru(x); }
+  __global__ void double2ull_ru_kernel_v3(Dummy* result, double x) { *result = __double2ull_ru(x); }
+  __global__ void double2ull_rz_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rz(x); }
+  __global__ void double2ull_rz_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rz(x); }
+  __global__ void double2ull_rz_kernel_v3(Dummy* result, double x) { *result = __double2ull_rz(x); }
+)"};
+
+static constexpr auto kDouble2Float{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2float_rd_kernel_v1(float* result, double* x) { *result = __double2float_rd(x); }
+  __global__ void double2float_rd_kernel_v2(float* result, Dummy x) { *result = __double2float_rd(x); }
+  __global__ void double2float_rd_kernel_v3(Dummy* result, double x) { *result = __double2float_rd(x); }
+  __global__ void double2float_rn_kernel_v1(float* result, double* x) { *result = __double2float_rn(x); }
+  __global__ void double2float_rn_kernel_v2(float* result, Dummy x) { *result = __double2float_rn(x); }
+  __global__ void double2float_rn_kernel_v3(Dummy* result, double x) { *result = __double2float_rn(x); }
+  __global__ void double2float_ru_kernel_v1(float* result, double* x) { *result = __double2float_ru(x); }
+  __global__ void double2float_ru_kernel_v2(float* result, Dummy x) { *result = __double2float_ru(x); }
+  __global__ void double2float_ru_kernel_v3(Dummy* result, double x) { *result = __double2float_ru(x); }
+  __global__ void double2float_rz_kernel_v1(float* result, double* x) { *result = __double2float_rz(x); }
+  __global__ void double2float_rz_kernel_v2(float* result, Dummy x) { *result = __double2float_rz(x); }
+  __global__ void double2float_rz_kernel_v3(Dummy* result, double x) { *result = __double2float_rz(x); }
+)"};
+
+static constexpr auto kDouble2Hiint{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2hiint_kernel_v1(int* result, double* x) { *result = __double2hiint(x); }
+  __global__ void double2hiint_kernel_v2(int* result, Dummy x) { *result = __double2hiint(x); }
+  __global__ void double2hiint_kernel_v3(Dummy* result, double x) { *result = __double2hiint(x); }
+)"};
+
+static constexpr auto kDouble2Loint{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double2loint_kernel_v1(int* result, double* x) { *result = __double2loint(x); }
+  __global__ void double2loint_kernel_v2(int* result, Dummy x) { *result = __double2loint(x); }
+  __global__ void double2loint_kernel_v3(Dummy* result, double x) { *result = __double2loint(x); }
+)"};
+
+static constexpr auto kDoubleAsLonglong{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void double_as_longlong_kernel_v1(long long int* result, double* x) { *result = __double_as_longlong(x); }
+  __global__ void double_as_longlong_kernel_v2(long long int* result, Dummy x) { *result = __double_as_longlong(x); }
+  __global__ void double_as_longlong_kernel_v3(Dummy* result, double x) { *result = __double_as_longlong(x); }
+)"};