From 1ca4fdc6f778726b64cf2393134a964886165c2d Mon Sep 17 00:00:00 2001
From: Nives Vukovic <nives.vukovic@htecgroup.com>
Date: Mon, 22 Jan 2024 22:21:39 +0530
Subject: [PATCH] EXSWHTEC-284 - Implement tests for square/cube root device
 math functions #228

Change-Id: Ic19a440337cf3724f476c464125977b9b30b023e


[ROCm/hip-tests commit: 46ada257307db5e604948478b6325092121e29c1]
---
 .../hip-tests/catch/unit/math/CMakeLists.txt  |  10 +
 .../hip-tests/catch/unit/math/math_common.hh  |   2 -
 .../math_root_negative_kernels_1Dand2D.cc     | 107 ++++
 .../math_root_negative_kernels_3Dand4D.cc     | 119 ++++
 .../math/math_root_negative_kernels_rtc.hh    | 428 +++++++++++++
 .../hip-tests/catch/unit/math/root_funcs.cc   | 604 ++++++++++++++++++
 6 files changed, 1268 insertions(+), 2 deletions(-)
 create mode 100644 projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc
 create mode 100644 projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc
 create mode 100644 projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh
 create mode 100644 projects/hip-tests/catch/unit/math/root_funcs.cc

diff --git a/projects/hip-tests/catch/unit/math/CMakeLists.txt b/projects/hip-tests/catch/unit/math/CMakeLists.txt
index e552b9a8a8..33c4311038 100644
--- a/projects/hip-tests/catch/unit/math/CMakeLists.txt
+++ b/projects/hip-tests/catch/unit/math/CMakeLists.txt
@@ -25,6 +25,7 @@ set(TEST_SRC
   single_precision_intrinsics.cc
   double_precision_intrinsics.cc
   integer_intrinsics.cc
+  root_funcs.cc
 )
 
 if(HIP_PLATFORM MATCHES "nvidia")
@@ -76,3 +77,12 @@ add_test(NAME Unit_Integer_Intrinsics_Negative
          COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
          ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
          integer_intrinsics_negative_kernels.cc 20)
+add_test(NAME Unit_Device_root_1Dand2D_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         math_root_negative_kernels_1Dand2D.cc 68)
+
+add_test(NAME Unit_Device_root_3Dand4D_Negative
+         COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
+         ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
+         math_root_negative_kernels_3Dand4D.cc 56)
diff --git a/projects/hip-tests/catch/unit/math/math_common.hh b/projects/hip-tests/catch/unit/math/math_common.hh
index 7ebc9b8f5d..010780474f 100644
--- a/projects/hip-tests/catch/unit/math/math_common.hh
+++ b/projects/hip-tests/catch/unit/math/math_common.hh
@@ -7,10 +7,8 @@ in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
-
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
-
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
diff --git a/projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc
new file mode 100644
index 0000000000..688eaa95be
--- /dev/null
+++ b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc
@@ -0,0 +1,107 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name)                                                  \
+  __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); }               \
+  __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); }                 \
+  __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); }             \
+  __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); }
+
+#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name)                                                 \
+  __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); }  \
+  __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); }  \
+  __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); }    \
+  __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); }    \
+  __global__ void func_name##f_kernel_v1(float* x, float y) { float result = func_name##f(x, y); } \
+  __global__ void func_name##f_kernel_v2(float x, float* y) { float result = func_name##f(x, y); } \
+  __global__ void func_name##f_kernel_v3(Dummy x, float y) { float result = func_name##f(x, y); }  \
+  __global__ void func_name##f_kernel_v4(float x, Dummy y) { float result = func_name##f(x, y); }
+
+#define NEGATIVE_KERNELS_SHELL_ARRAY_ARG(func_name)                                                \
+  __global__ void func_name##_kernel_v1(int* dim, const double* a) {                               \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v2(Dummy dim, const double* a) {                              \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v3(int dim, const int* a) {                                   \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v4(int dim, const char* a) {                                  \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v5(int dim, const short* a) {                                 \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v6(int dim, const long* a) {                                  \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v7(int dim, const long long* a) {                             \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v8(int dim, const float* a) {                                 \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v9(int dim, const Dummy* a) {                                 \
+    double result = func_name(dim, a);                                                             \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v1(int* dim, const float* a) {                               \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v2(Dummy dim, const float* a) {                              \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v3(int dim, const int* a) {                                  \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v4(int dim, const char* a) {                                 \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v5(int dim, const short* a) {                                \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v6(int dim, const long* a) {                                 \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v7(int dim, const long long* a) {                            \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v8(int dim, const double* a) {                               \
+    float result = func_name##f(dim, a);                                                           \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v9(int dim, const Dummy* a) {                                \
+    double result = func_name##f(dim, a);                                                          \
+  }
+
+NEGATIVE_KERNELS_SHELL_ONE_ARG(sqrt)
+NEGATIVE_KERNELS_SHELL_ONE_ARG(rsqrt)
+NEGATIVE_KERNELS_SHELL_ONE_ARG(cbrt)
+NEGATIVE_KERNELS_SHELL_ONE_ARG(rcbrt)
+NEGATIVE_KERNELS_SHELL_TWO_ARGS(hypot)
+NEGATIVE_KERNELS_SHELL_TWO_ARGS(rhypot)
+NEGATIVE_KERNELS_SHELL_ARRAY_ARG(norm)
+NEGATIVE_KERNELS_SHELL_ARRAY_ARG(rnorm)
diff --git a/projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc
new file mode 100644
index 0000000000..be8d206af6
--- /dev/null
+++ b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc
@@ -0,0 +1,119 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+class Dummy {
+ public:
+  __device__ Dummy() {}
+  __device__ ~Dummy() {}
+};
+
+#define NEGATIVE_KERNELS_SHELL_THREE_ARGS(func_name)                                               \
+  __global__ void func_name##_kernel_v1(double* x, double y, double z) {                           \
+    double result = func_name(x, y, z);                                                            \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v2(double x, double* y, double z) {                           \
+    double result = func_name(x, y, z);                                                            \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v3(double x, double y, double* z) {                           \
+    double result = func_name(x, y, z);                                                            \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v4(Dummy x, double y, double z) {                             \
+    double result = func_name(x, y, z);                                                            \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v5(double x, Dummy y, double z) {                             \
+    double result = func_name(x, y, z);                                                            \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v6(double x, double y, Dummy z) {                             \
+    double result = func_name(x, y, z);                                                            \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v1(float* x, float y, float z) {                             \
+    float result = func_name##f(x, y, z);                                                          \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v2(float x, float* y, float z) {                             \
+    float result = func_name##f(x, y, z);                                                          \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v3(float x, float y, float* z) {                             \
+    float result = func_name##f(x, y, z);                                                          \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v4(Dummy x, float y, float z) {                              \
+    float result = func_name##f(x, y, z);                                                          \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v5(float x, Dummy y, float z) {                              \
+    float result = func_name##f(x, y, z);                                                          \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v6(float x, float y, Dummy z) {                              \
+    float result = func_name##f(x, y, z);                                                          \
+  }
+
+#define NEGATIVE_KERNELS_SHELL_FOUR_ARGS(func_name)                                                \
+  __global__ void func_name##_kernel_v1(double* x, double y, double z, double w) {                 \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v2(double x, double* y, double z, double w) {                 \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v3(double x, double y, double* z, double w) {                 \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v4(double x, double y, double z, double* w) {                 \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v5(Dummy x, double y, double z, double w) {                   \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v6(double x, Dummy y, double z, double w) {                   \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v7(double x, double y, Dummy z, double w) {                   \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##_kernel_v8(double x, double y, double z, Dummy w) {                   \
+    double result = func_name(x, y, z, w);                                                         \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v1(float* x, float y, float z, float w) {                    \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v2(float x, float* y, float z, float w) {                    \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v3(float x, float y, float* z, float w) {                    \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v4(float x, float y, float z, float* w) {                    \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v5(Dummy x, float y, float z, float w) {                     \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v6(float x, Dummy y, float z, float w) {                     \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v7(float x, float y, Dummy z, float w) {                     \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }                                                                                                \
+  __global__ void func_name##f_kernel_v8(float x, float y, float z, Dummy w) {                     \
+    float result = func_name##f(x, y, z, w);                                                       \
+  }
+
+NEGATIVE_KERNELS_SHELL_THREE_ARGS(norm3d)
+NEGATIVE_KERNELS_SHELL_THREE_ARGS(rnorm3d)
+NEGATIVE_KERNELS_SHELL_FOUR_ARGS(norm4d)
+NEGATIVE_KERNELS_SHELL_FOUR_ARGS(rnorm4d)
diff --git a/projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh
new file mode 100644
index 0000000000..53507ee23c
--- /dev/null
+++ b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh
@@ -0,0 +1,428 @@
+/*
+Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#pragma once
+
+/*
+Negative kernels used for the math root negative Test Cases that are using RTC.
+*/
+
+static constexpr auto kSqrt{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void sqrt_kernel_v1(double* x) { double result = sqrt(x); }
+  __global__ void sqrt_kernel_v2(Dummy x) { double result = sqrt(x); }
+  __global__ void sqrtf_kernel_v1(float* x) { float result = sqrtf(x); }
+  __global__ void sqrtf_kernel_v2(Dummy x) { float result = sqrtf(x); }
+)"};
+
+static constexpr auto kRsqrt{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void rsqrt_kernel_v1(double* x) { double result = rsqrt(x); }
+  __global__ void rsqrt_kernel_v2(Dummy x) { double result = rsqrt(x); }
+  __global__ void rsqrtf_kernel_v1(float* x) { float result = rsqrtf(x); }
+  __global__ void rsqrtf_kernel_v2(Dummy x) { float result = rsqrtf(x); }
+)"};
+
+static constexpr auto kCbrt{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void cbrt_kernel_v1(double* x) { double result = cbrt(x); }
+  __global__ void cbrt_kernel_v2(Dummy x) { double result = cbrt(x); }
+  __global__ void cbrtf_kernel_v1(float* x) { float result = cbrtf(x); }
+  __global__ void cbrtf_kernel_v2(Dummy x) { float result = cbrtf(x); }
+)"};
+
+static constexpr auto kRcbrt{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void rcbrt_kernel_v1(double* x) { double result = rcbrt(x); }
+  __global__ void rcbrt_kernel_v2(Dummy x) { double result = rcbrt(x); }
+  __global__ void rcbrtf_kernel_v1(float* x) { float result = rcbrtf(x); }
+  __global__ void rcbrtf_kernel_v2(Dummy x) { float result = rcbrtf(x); }
+)"};
+
+static constexpr auto kHypot{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void hypot_kernel_v1(double* x, double y) { double result = hypot(x, y); }
+  __global__ void hypot_kernel_v2(double x, double* y) { double result = hypot(x, y); }
+  __global__ void hypot_kernel_v3(Dummy x, double y) { double result = hypot(x, y); }
+  __global__ void hypot_kernel_v4(double x, Dummy y) { double result = hypot(x, y); }
+  __global__ void hypotf_kernel_v1(float* x, float y) { float result = hypotf(x, y); }
+  __global__ void hypotf_kernel_v2(float x, float* y) { float result = hypotf(x, y); }
+  __global__ void hypotf_kernel_v3(Dummy x, float y) { float result = hypotf(x, y); }
+  __global__ void hypotf_kernel_v4(float x, Dummy y) { float result = hypotf(x, y); }
+)"};
+
+static constexpr auto kRhypot{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void rhypot_kernel_v1(double* x, double y) { double result = rhypot(x, y); }
+  __global__ void rhypot_kernel_v2(double x, double* y) { double result = rhypot(x, y); }
+  __global__ void rhypot_kernel_v3(Dummy x, double y) { double result = rhypot(x, y); }
+  __global__ void rhypot_kernel_v4(double x, Dummy y) { double result = rhypot(x, y); }
+  __global__ void rhypotf_kernel_v1(float* x, float y) { float result = rhypotf(x, y); }
+  __global__ void rhypotf_kernel_v2(float x, float* y) { float result = rhypotf(x, y); }
+  __global__ void rhypotf_kernel_v3(Dummy x, float y) { float result = rhypotf(x, y); }
+  __global__ void rhypotf_kernel_v4(float x, Dummy y) { float result = rhypotf(x, y); }
+)"};
+
+static constexpr auto kNorm3D{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void norm3d_kernel_v1(double* x, double y, double z) {
+    double result = norm3d(x, y, z);
+  }
+  __global__ void norm3d_kernel_v2(double x, double* y, double z) {
+    double result = norm3d(x, y, z);
+  }
+  __global__ void norm3d_kernel_v3(double x, double y, double* z) {
+    double result = norm3d(x, y, z);
+  }
+  __global__ void norm3d_kernel_v4(Dummy x, double y, double z) {
+    double result = norm3d(x, y, z);
+  }
+  __global__ void norm3d_kernel_v5(double x, Dummy y, double z) {
+    double result = norm3d(x, y, z);
+  }
+  __global__ void norm3d_kernel_v6(double x, double y, Dummy z) {
+    double result = norm3d(x, y, z);
+  }
+  __global__ void norm3df_kernel_v1(float* x, float y, float z) {
+    float result = norm3df(x, y, z);
+  }
+  __global__ void norm3df_kernel_v2(float x, float* y, float z) {
+    float result = norm3df(x, y, z);
+  }
+  __global__ void norm3df_kernel_v3(float x, float y, float* z) {
+    float result = norm3df(x, y, z);
+  }
+  __global__ void norm3df_kernel_v4(Dummy x, float y, float z) {
+    float result = norm3df(x, y, z);
+  }
+  __global__ void norm3df_kernel_v5(float x, Dummy y, float z) {
+    float result = norm3df(x, y, z);
+  }
+  __global__ void norm3df_kernel_v6(float x, float y, Dummy z) {
+    float result = norm3df(x, y, z);
+  }
+)"};
+
+static constexpr auto kRnorm3D{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void rnorm3d_kernel_v1(double* x, double y, double z) {
+    double result = rnorm3d(x, y, z);
+  }
+  __global__ void rnorm3d_kernel_v2(double x, double* y, double z) {
+    double result = rnorm3d(x, y, z);
+  }
+  __global__ void rnorm3d_kernel_v3(double x, double y, double* z) {
+    double result = rnorm3d(x, y, z);
+  }
+  __global__ void rnorm3d_kernel_v4(Dummy x, double y, double z) {
+    double result = rnorm3d(x, y, z);
+  }
+  __global__ void rnorm3d_kernel_v5(double x, Dummy y, double z) {
+    double result = rnorm3d(x, y, z);
+  }
+  __global__ void rnorm3d_kernel_v6(double x, double y, Dummy z) {
+    double result = rnorm3d(x, y, z);
+  }
+  __global__ void rnorm3df_kernel_v1(float* x, float y, float z) {
+    float result = rnorm3df(x, y, z);
+  }
+  __global__ void rnorm3df_kernel_v2(float x, float* y, float z) {
+    float result = rnorm3df(x, y, z);
+  }
+  __global__ void rnorm3df_kernel_v3(float x, float y, float* z) {
+    float result = rnorm3df(x, y, z);
+  }
+  __global__ void rnorm3df_kernel_v4(Dummy x, float y, float z) {
+    float result = rnorm3df(x, y, z);
+  }
+  __global__ void rnorm3df_kernel_v5(float x, Dummy y, float z) {
+    float result = rnorm3df(x, y, z);
+  }
+  __global__ void rnorm3df_kernel_v6(float x, float y, Dummy z) {
+    float result = rnorm3df(x, y, z);
+  }
+)"};
+
+static constexpr auto kNorm4D{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void norm4d_kernel_v1(double* x, double y, double z, double w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v2(double x, double* y, double z, double w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v3(double x, double y, double* z, double w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v4(double x, double y, double z, double* w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v5(Dummy x, double y, double z, double w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v6(double x, Dummy y, double z, double w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v7(double x, double y, Dummy z, double w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4d_kernel_v8(double x, double y, double z, Dummy w) {
+    double result = norm4d(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v1(float* x, float y, float z, float w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v2(float x, float* y, float z, float w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v3(float x, float y, float* z, float w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v4(float x, float y, float z, float* w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v5(Dummy x, float y, float z, float w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v6(float x, Dummy y, float z, float w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v7(float x, float y, Dummy z, float w) {
+    float result = norm4df(x, y, z, w);
+  }
+  __global__ void norm4df_kernel_v8(float x, float y, float z, Dummy w) {
+    float result = norm4df(x, y, z, w);
+  }
+)"};
+
+static constexpr auto kRnorm4D{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void rnorm4d_kernel_v1(double* x, double y, double z, double w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v2(double x, double* y, double z, double w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v3(double x, double y, double* z, double w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v4(double x, double y, double z, double* w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v5(Dummy x, double y, double z, double w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v6(double x, Dummy y, double z, double w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v7(double x, double y, Dummy z, double w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4d_kernel_v8(double x, double y, double z, Dummy w) {
+    double result = rnorm4d(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v1(float* x, float y, float z, float w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v2(float x, float* y, float z, float w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v3(float x, float y, float* z, float w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v4(float x, float y, float z, float* w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v5(Dummy x, float y, float z, float w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v6(float x, Dummy y, float z, float w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v7(float x, float y, Dummy z, float w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+  __global__ void rnorm4df_kernel_v8(float x, float y, float z, Dummy w) {
+    float result = rnorm4df(x, y, z, w);
+  }
+)"};
+
+static constexpr auto kNorm{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void norm_kernel_v1(int* dim, const double* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v2(Dummy dim, const double* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v3(int dim, const int* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v4(int dim, const char* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v5(int dim, const short* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v6(int dim, const long* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v7(int dim, const long long* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v8(int dim, const float* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void norm_kernel_v9(int dim, const Dummy* a) {
+    double result = norm(dim, a);
+  }
+  __global__ void normf_kernel_v1(int* dim, const float* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v2(Dummy dim, const float* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v3(int dim, const int* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v4(int dim, const char* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v5(int dim, const short* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v6(int dim, const long* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v7(int dim, const long long* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v8(int dim, const double* a) {
+    float result = normf(dim, a);
+  }
+  __global__ void normf_kernel_v9(int dim, const Dummy* a) {
+    double result = normf(dim, a);
+  }
+)"};
+
+static constexpr auto kRnorm{R"(
+  class Dummy {
+   public:
+    __device__ Dummy() {}
+    __device__ ~Dummy() {}
+  };
+  __global__ void rnorm_kernel_v1(int* dim, const double* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v2(Dummy dim, const double* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v3(int dim, const int* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v4(int dim, const char* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v5(int dim, const short* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v6(int dim, const long* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v7(int dim, const long long* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v8(int dim, const float* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnorm_kernel_v9(int dim, const Dummy* a) {
+    double result = rnorm(dim, a);
+  }
+  __global__ void rnormf_kernel_v1(int* dim, const float* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v2(Dummy dim, const float* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v3(int dim, const int* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v4(int dim, const char* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v5(int dim, const short* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v6(int dim, const long* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v7(int dim, const long long* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v8(int dim, const double* a) {
+    float result = rnormf(dim, a);
+  }
+  __global__ void rnormf_kernel_v9(int dim, const Dummy* a) {
+    double result = rnormf(dim, a);
+  }
+)"};
diff --git a/projects/hip-tests/catch/unit/math/root_funcs.cc b/projects/hip-tests/catch/unit/math/root_funcs.cc
new file mode 100644
index 0000000000..1638ca8b04
--- /dev/null
+++ b/projects/hip-tests/catch/unit/math/root_funcs.cc
@@ -0,0 +1,604 @@
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "unary_common.hh"
+#include "binary_common.hh"
+#include "ternary_common.hh"
+#include "quaternary_common.hh"
+#include "math_root_negative_kernels_rtc.hh"
+
+/**
+ * @addtogroup RootMathFuncs RootMathFuncs
+ * @{
+ * @ingroup MathTest
+ */
+
+/********** Unary Functions **********/
+
+MATH_UNARY_KERNEL_DEF(sqrt)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `sqrtf(x)` for all possible inputs. The results are
+ * compared against reference function `float std::exp(float)`. The maximum ulp error is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_sqrtf_Accuracy_Positive") {
+  float (*ref)(float) = std::sqrt;
+  UnarySinglePrecisionTest(sqrt_kernel<float>, ref, ULPValidatorBuilderFactory<float>(1));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `sqrt(x)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The results are
+ * compared against reference function `double std::sqrt(double)`. The error bounds are
+ * IEEE-compliant.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_sqrt_Accuracy_Positive") {
+  double (*ref)(double) = std::sqrt;
+  UnaryDoublePrecisionTest<double>(sqrt_kernel<double>, ref, ULPValidatorBuilderFactory<double>(0));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for sqrtf and sqrt.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_sqrt_sqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSqrt); }
+
+MATH_UNARY_KERNEL_DEF(rsqrt)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rsqrtf(x)` for all possible inputs. The maximum ulp error
+ * is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rsqrtf_Accuracy_Positive") {
+  auto rsqrt_ref = [](double arg) -> double { return 1. / std::sqrt(arg); };
+  double (*ref)(double) = rsqrt_ref;
+  UnarySinglePrecisionTest(rsqrt_kernel<float>, ref, ULPValidatorBuilderFactory<float>(2));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rsqrt(x)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The maximum ulp error is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rsqrt_Accuracy_Positive") {
+  auto rsqrt_ref = [](long double arg) -> long double { return 1.L / std::sqrt(arg); };
+  long double (*ref)(long double) = rsqrt_ref;
+  UnaryDoublePrecisionTest(rsqrt_kernel<double>, ref, ULPValidatorBuilderFactory<double>(1));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for rsqrtf and rsqrt.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rsqrt_rsqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRsqrt); }
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `cbrtf(x)` for all possible inputs and `cbrt(x)` against a
+ * table of difficult values, followed by a large number of randomly generated values. The results
+ * are compared against reference function `T std::cbrt(T)`. The maximum ulp error is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_UNARY_WITHIN_ULP_TEST_DEF(cbrt, std::cbrt, 1, 1)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for cbrtf and cbrt.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_cbrt_cbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCbrt); }
+
+MATH_UNARY_KERNEL_DEF(rcbrt)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rcbrtf(x)` for all possible inputs. The maximum ulp error
+ * is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rcbrtf_Accuracy_Positive") {
+  auto rcbrt_ref = [](double arg) -> double { return 1. / std::cbrt(arg); };
+  double (*ref)(double) = rcbrt_ref;
+  UnarySinglePrecisionTest(rcbrt_kernel<float>, ref, ULPValidatorBuilderFactory<float>(1));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rcbrt(x)` against a table of difficult values,
+ * followed by a large number of randomly generated values. The maximum ulp error is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rcbrt_Accuracy_Positive") {
+  auto rcbrt_ref = [](long double arg) -> long double { return 1. / std::cbrt(arg); };
+  long double (*ref)(long double) = rcbrt_ref;
+  UnaryDoublePrecisionTest(rcbrt_kernel<double>, ref, ULPValidatorBuilderFactory<double>(1));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass argument of invalid type for rcbrtf and rcbrt.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rcbrt_rcbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRcbrt); }
+
+/********** Binary Functions **********/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `hypotf(x, y)` and  `hypot(x, y)` against a table of
+ * difficult values, followed by a large number of randomly generated values. The results are
+ * compared against reference function `T std::hypot(T, T)`. The maximum ulp error for single
+ * precision is 3 and for double precision is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+MATH_BINARY_WITHIN_ULP_TEST_DEF(hypot, std::hypot, 3, 2)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for hypotf and hypot.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_hypot_hypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kHypot); }
+
+MATH_BINARY_KERNEL_DEF(rhypot)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rhypotf(x, y)` and `rhypot(x, y)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The maximum ulp error
+ * for single precision is 2 and for double precision is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_rhypot_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto rhypot_ref = [](RT arg1, RT arg2) -> RT { return 1. / std::hypot(arg1, arg2); };
+  RT (*ref)(RT, RT) = rhypot_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 2 : 1;
+  BinaryFloatingPointTest(rhypot_kernel<TestType>, ref, ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for rhypotf and rhypot.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rhypot_rhypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kRhypot); }
+
+/********** Ternary Functions **********/
+
+MATH_TERNARY_KERNEL_DEF(norm3d)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `norm3df(x, y, z)` and  `norm3d(x, y, z)` against a table of
+ * difficult values, followed by a large number of randomly generated values. The maximum ulp error
+ * for single precision is 3 and for double precision is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_norm3d_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto norm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT {
+    if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) {
+      return std::numeric_limits<RT>::infinity();
+    }
+    return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3);
+  };
+  RT (*ref)(RT, RT, RT) = norm3d_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 3 : 2;
+  TernaryFloatingPointTest(norm3d_kernel<TestType>, ref, ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for norm3df and norm3d.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_norm3d_norm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kNorm3D); }
+
+MATH_TERNARY_KERNEL_DEF(rnorm3d)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rnorm3df(x, y, z)` and `rnorm3d(x, y, z)`against a table of
+ * difficult values, followed by a large number of randomly generated values. The maximum ulp error
+ * for single precision is 2 and for double precision is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_rnorm3d_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto rnorm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT {
+    if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) {
+      return 0;
+    }
+    return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3);
+  };
+  RT (*ref)(RT, RT, RT) = rnorm3d_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 2 : 1;
+  TernaryFloatingPointTest(rnorm3d_kernel<TestType>, ref,
+                           ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for rnorm3df and rnorm3d.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rnorm3d_rnorm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kRnorm3D); }
+
+/********** Quaternary Functions **********/
+
+MATH_QUATERNARY_KERNEL_DEF(norm4d)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `norm4df(x, y, z, t)` and  `norm4d(x, y, z, t)` against a
+ * table of difficult values, followed by a large number of randomly generated values. The maximum
+ * ulp error for single precision is 3 and for double precision is 2.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_norm4d_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto norm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT {
+    if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) {
+      return std::numeric_limits<RT>::infinity();
+    }
+    return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4);
+  };
+  RT (*ref)(RT, RT, RT, RT) = norm4d_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 3 : 2;
+  QuaternaryFloatingPointTest(norm4d_kernel<TestType>, ref,
+                              ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for norm4df and norm4d.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_norm4d_norm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kNorm4D); }
+
+MATH_QUATERNARY_KERNEL_DEF(rnorm4d)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Tests the numerical accuracy of `rnorm4df(x, y, z, t)` and `rnorm4d(x, y, z, t)`against a
+ * table of difficult values, followed by a large number of randomly generated values. The maximum
+ * ulp error for single precision is 2 and for double precision is 1.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_rnorm4d_Accuracy_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto rnorm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT {
+    if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) {
+      return 0;
+    }
+    return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4);
+  };
+  RT (*ref)(RT, RT, RT, RT) = rnorm4d_ref;
+  const auto ulp = std::is_same_v<float, TestType> ? 2 : 1;
+  QuaternaryFloatingPointTest(rnorm4d_kernel<TestType>, ref,
+                              ULPValidatorBuilderFactory<TestType>(ulp));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for rnorm4df and rnorm4d.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rnorm4d_rnorm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kRnorm4D); }
+
+/********** norm Function **********/
+
+#define MATH_NORM_KERNEL_DEF(func_name)                                                            \
+  template <typename T> __global__ void func_name##_kernel(T* const ys, int dim, T* const x1s) {   \
+    if constexpr (std::is_same_v<float, T>) {                                                      \
+      *ys = func_name##f(dim, x1s);                                                                \
+    } else if constexpr (std::is_same_v<double, T>) {                                              \
+      *ys = func_name(dim, x1s);                                                                   \
+    }                                                                                              \
+  }
+
+template <typename T, typename F, typename RF, typename ValidatorBuilder>
+void NormSimpleTest(F kernel, RF ref_func, const ValidatorBuilder& validator_builder) {
+  const auto max_dim = 10000;
+
+  LinearAllocGuard<T> x{LinearAllocs::hipHostMalloc, max_dim * sizeof(T)};
+  LinearAllocGuard<T> x_dev{LinearAllocs::hipMalloc, max_dim * sizeof(T)};
+  LinearAllocGuard<T> y{LinearAllocs::hipHostMalloc, sizeof(T)};
+  LinearAllocGuard<T> y_dev{LinearAllocs::hipMalloc, sizeof(T)};
+
+  std::fill_n(x.ptr(), max_dim, 1);
+  HIP_CHECK(hipMemcpy(x_dev.ptr(), x.ptr(), max_dim * sizeof(T), hipMemcpyHostToDevice));
+
+  for (uint64_t i = 1u; i < max_dim; i++) {
+    kernel<<<1, 1>>>(y_dev.ptr(), i, x_dev.ptr());
+    HIP_CHECK(hipGetLastError());
+
+    HIP_CHECK(hipMemcpy(y.ptr(), y_dev.ptr(), sizeof(T), hipMemcpyDeviceToHost));
+    const auto actual_val = *y.ptr();
+    const auto ref_val = static_cast<T>(ref_func(i, x.ptr()));
+    const auto validator = validator_builder(ref_val);
+
+    if (!validator->match(actual_val)) {
+      std::stringstream ss;
+      ss << std::scientific << std::setprecision(std::numeric_limits<T>::max_digits10 - 1);
+      ss << "Validation fails for dim: " << i << " " << actual_val << " " << ref_val;
+      INFO(ss.str());
+      REQUIRE(false);
+    }
+  }
+}
+
+MATH_NORM_KERNEL_DEF(norm)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `normf(dim, arr)` and `norm(dim, arr)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_norm_Sanity_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto norm_ref = [](int dim, TestType* args) -> RT {
+    RT sum = 0;
+    for (int i = 0; i < dim; i++) {
+      if (std::isinf(args[i])) return std::numeric_limits<RT>::infinity();
+      sum += static_cast<RT>(args[i]) * static_cast<RT>(args[i]);
+    }
+    return std::sqrt(sum);
+  };
+  RT (*ref)(int, TestType*) = norm_ref;
+
+  NormSimpleTest<TestType>(norm_kernel<TestType>, ref, ULPValidatorBuilderFactory<TestType>(10));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for normf and norm.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_norm_normf_Negative_RTC") { NegativeTestRTCWrapper<18>(kNorm); }
+
+MATH_NORM_KERNEL_DEF(rnorm)
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Sanity test for `rnormf(dim, arr)` and `rnorm(dim, arr)`.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEMPLATE_TEST_CASE("Unit_Device_rnorm_Sanity_Positive", "", float, double) {
+  using RT = RefType_t<TestType>;
+  auto rnorm_ref = [](int dim, TestType* args) -> RT {
+    RT sum = 0;
+    for (int i = 0; i < dim; i++) {
+      if (std::isinf(args[i])) return std::numeric_limits<RT>::infinity();
+      sum += static_cast<RT>(args[i]) * static_cast<RT>(args[i]);
+    }
+    return 1. / std::sqrt(sum);
+  };
+  RT (*ref)(int, TestType*) = rnorm_ref;
+
+  NormSimpleTest<TestType>(rnorm_kernel<TestType>, ref, ULPValidatorBuilderFactory<TestType>(10));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - RTCs kernels that pass combinations of arguments of invalid types for rnormf and rnorm.
+ *
+ * Test source
+ * ------------------------
+ *    - unit/math/root_funcs.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.2
+ */
+TEST_CASE("Unit_Device_rnorm_rnormf_Negative_RTC") { NegativeTestRTCWrapper<18>(kRnorm); }