From 1ca4fdc6f778726b64cf2393134a964886165c2d Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Mon, 22 Jan 2024 22:21:39 +0530 Subject: [PATCH] EXSWHTEC-284 - Implement tests for square/cube root device math functions #228 Change-Id: Ic19a440337cf3724f476c464125977b9b30b023e [ROCm/hip-tests commit: 46ada257307db5e604948478b6325092121e29c1] --- .../hip-tests/catch/unit/math/CMakeLists.txt | 10 + .../hip-tests/catch/unit/math/math_common.hh | 2 - .../math_root_negative_kernels_1Dand2D.cc | 107 ++++ .../math_root_negative_kernels_3Dand4D.cc | 119 ++++ .../math/math_root_negative_kernels_rtc.hh | 428 +++++++++++++ .../hip-tests/catch/unit/math/root_funcs.cc | 604 ++++++++++++++++++ 6 files changed, 1268 insertions(+), 2 deletions(-) create mode 100644 projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc create mode 100644 projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc create mode 100644 projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh create mode 100644 projects/hip-tests/catch/unit/math/root_funcs.cc diff --git a/projects/hip-tests/catch/unit/math/CMakeLists.txt b/projects/hip-tests/catch/unit/math/CMakeLists.txt index e552b9a8a8..33c4311038 100644 --- a/projects/hip-tests/catch/unit/math/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/math/CMakeLists.txt @@ -25,6 +25,7 @@ set(TEST_SRC single_precision_intrinsics.cc double_precision_intrinsics.cc integer_intrinsics.cc + root_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -76,3 +77,12 @@ add_test(NAME Unit_Integer_Intrinsics_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} integer_intrinsics_negative_kernels.cc 20) +add_test(NAME Unit_Device_root_1Dand2D_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_root_negative_kernels_1Dand2D.cc 68) + +add_test(NAME Unit_Device_root_3Dand4D_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_root_negative_kernels_3Dand4D.cc 56) diff --git a/projects/hip-tests/catch/unit/math/math_common.hh b/projects/hip-tests/catch/unit/math/math_common.hh index 7ebc9b8f5d..010780474f 100644 --- a/projects/hip-tests/catch/unit/math/math_common.hh +++ b/projects/hip-tests/catch/unit/math/math_common.hh @@ -7,10 +7,8 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc new file mode 100644 index 0000000000..688eaa95be --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_1Dand2D.cc @@ -0,0 +1,107 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(float* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(float x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { float result = func_name##f(x, y); } + +#define NEGATIVE_KERNELS_SHELL_ARRAY_ARG(func_name) \ + __global__ void func_name##_kernel_v1(int* dim, const double* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v2(Dummy dim, const double* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v3(int dim, const int* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v4(int dim, const char* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v5(int dim, const short* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v6(int dim, const long* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v7(int dim, const long long* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v8(int dim, const float* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v9(int dim, const Dummy* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##f_kernel_v1(int* dim, const float* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v2(Dummy dim, const float* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v3(int dim, const int* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v4(int dim, const char* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v5(int dim, const short* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v6(int dim, const long* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v7(int dim, const long long* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v8(int dim, const double* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v9(int dim, const Dummy* a) { \ + double result = func_name##f(dim, a); \ + } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(sqrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(rsqrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cbrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(rcbrt) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(hypot) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(rhypot) +NEGATIVE_KERNELS_SHELL_ARRAY_ARG(norm) +NEGATIVE_KERNELS_SHELL_ARRAY_ARG(rnorm) diff --git a/projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc new file mode 100644 index 0000000000..be8d206af6 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_3Dand4D.cc @@ -0,0 +1,119 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_THREE_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(double x, double* y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(double x, double y, double* z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, double y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(double x, Dummy y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(double x, double y, Dummy z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v1(float* x, float y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v2(float x, float* y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v3(float x, float y, float* z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v4(Dummy x, float y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v5(float x, Dummy y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v6(float x, float y, Dummy z) { \ + float result = func_name##f(x, y, z); \ + } + +#define NEGATIVE_KERNELS_SHELL_FOUR_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v2(double x, double* y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v3(double x, double y, double* z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v4(double x, double y, double z, double* w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v5(Dummy x, double y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v6(double x, Dummy y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v7(double x, double y, Dummy z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v8(double x, double y, double z, Dummy w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v1(float* x, float y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v2(float x, float* y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v3(float x, float y, float* z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v4(float x, float y, float z, float* w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v5(Dummy x, float y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v6(float x, Dummy y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v7(float x, float y, Dummy z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v8(float x, float y, float z, Dummy w) { \ + float result = func_name##f(x, y, z, w); \ + } + +NEGATIVE_KERNELS_SHELL_THREE_ARGS(norm3d) +NEGATIVE_KERNELS_SHELL_THREE_ARGS(rnorm3d) +NEGATIVE_KERNELS_SHELL_FOUR_ARGS(norm4d) +NEGATIVE_KERNELS_SHELL_FOUR_ARGS(rnorm4d) diff --git a/projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh new file mode 100644 index 0000000000..53507ee23c --- /dev/null +++ b/projects/hip-tests/catch/unit/math/math_root_negative_kernels_rtc.hh @@ -0,0 +1,428 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math root negative Test Cases that are using RTC. +*/ + +static constexpr auto kSqrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sqrt_kernel_v1(double* x) { double result = sqrt(x); } + __global__ void sqrt_kernel_v2(Dummy x) { double result = sqrt(x); } + __global__ void sqrtf_kernel_v1(float* x) { float result = sqrtf(x); } + __global__ void sqrtf_kernel_v2(Dummy x) { float result = sqrtf(x); } +)"}; + +static constexpr auto kRsqrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rsqrt_kernel_v1(double* x) { double result = rsqrt(x); } + __global__ void rsqrt_kernel_v2(Dummy x) { double result = rsqrt(x); } + __global__ void rsqrtf_kernel_v1(float* x) { float result = rsqrtf(x); } + __global__ void rsqrtf_kernel_v2(Dummy x) { float result = rsqrtf(x); } +)"}; + +static constexpr auto kCbrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cbrt_kernel_v1(double* x) { double result = cbrt(x); } + __global__ void cbrt_kernel_v2(Dummy x) { double result = cbrt(x); } + __global__ void cbrtf_kernel_v1(float* x) { float result = cbrtf(x); } + __global__ void cbrtf_kernel_v2(Dummy x) { float result = cbrtf(x); } +)"}; + +static constexpr auto kRcbrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rcbrt_kernel_v1(double* x) { double result = rcbrt(x); } + __global__ void rcbrt_kernel_v2(Dummy x) { double result = rcbrt(x); } + __global__ void rcbrtf_kernel_v1(float* x) { float result = rcbrtf(x); } + __global__ void rcbrtf_kernel_v2(Dummy x) { float result = rcbrtf(x); } +)"}; + +static constexpr auto kHypot{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void hypot_kernel_v1(double* x, double y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v2(double x, double* y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v3(Dummy x, double y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v4(double x, Dummy y) { double result = hypot(x, y); } + __global__ void hypotf_kernel_v1(float* x, float y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v2(float x, float* y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v3(Dummy x, float y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v4(float x, Dummy y) { float result = hypotf(x, y); } +)"}; + +static constexpr auto kRhypot{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rhypot_kernel_v1(double* x, double y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v2(double x, double* y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v3(Dummy x, double y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v4(double x, Dummy y) { double result = rhypot(x, y); } + __global__ void rhypotf_kernel_v1(float* x, float y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v2(float x, float* y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v3(Dummy x, float y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v4(float x, Dummy y) { float result = rhypotf(x, y); } +)"}; + +static constexpr auto kNorm3D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm3d_kernel_v1(double* x, double y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v2(double x, double* y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v3(double x, double y, double* z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v4(Dummy x, double y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v5(double x, Dummy y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v6(double x, double y, Dummy z) { + double result = norm3d(x, y, z); + } + __global__ void norm3df_kernel_v1(float* x, float y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v2(float x, float* y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v3(float x, float y, float* z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v4(Dummy x, float y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v5(float x, Dummy y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v6(float x, float y, Dummy z) { + float result = norm3df(x, y, z); + } +)"}; + +static constexpr auto kRnorm3D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm3d_kernel_v1(double* x, double y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v2(double x, double* y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v3(double x, double y, double* z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v4(Dummy x, double y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v5(double x, Dummy y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v6(double x, double y, Dummy z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3df_kernel_v1(float* x, float y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v2(float x, float* y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v3(float x, float y, float* z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v4(Dummy x, float y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v5(float x, Dummy y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v6(float x, float y, Dummy z) { + float result = rnorm3df(x, y, z); + } +)"}; + +static constexpr auto kNorm4D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm4d_kernel_v1(double* x, double y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v2(double x, double* y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v3(double x, double y, double* z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v4(double x, double y, double z, double* w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v5(Dummy x, double y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v6(double x, Dummy y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v7(double x, double y, Dummy z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v8(double x, double y, double z, Dummy w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4df_kernel_v1(float* x, float y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v2(float x, float* y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v3(float x, float y, float* z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v4(float x, float y, float z, float* w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v5(Dummy x, float y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v6(float x, Dummy y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v7(float x, float y, Dummy z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v8(float x, float y, float z, Dummy w) { + float result = norm4df(x, y, z, w); + } +)"}; + +static constexpr auto kRnorm4D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm4d_kernel_v1(double* x, double y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v2(double x, double* y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v3(double x, double y, double* z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v4(double x, double y, double z, double* w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v5(Dummy x, double y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v6(double x, Dummy y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v7(double x, double y, Dummy z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v8(double x, double y, double z, Dummy w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4df_kernel_v1(float* x, float y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v2(float x, float* y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v3(float x, float y, float* z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v4(float x, float y, float z, float* w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v5(Dummy x, float y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v6(float x, Dummy y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v7(float x, float y, Dummy z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v8(float x, float y, float z, Dummy w) { + float result = rnorm4df(x, y, z, w); + } +)"}; + +static constexpr auto kNorm{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm_kernel_v1(int* dim, const double* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v2(Dummy dim, const double* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v3(int dim, const int* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v4(int dim, const char* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v5(int dim, const short* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v6(int dim, const long* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v7(int dim, const long long* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v8(int dim, const float* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v9(int dim, const Dummy* a) { + double result = norm(dim, a); + } + __global__ void normf_kernel_v1(int* dim, const float* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v2(Dummy dim, const float* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v3(int dim, const int* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v4(int dim, const char* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v5(int dim, const short* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v6(int dim, const long* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v7(int dim, const long long* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v8(int dim, const double* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v9(int dim, const Dummy* a) { + double result = normf(dim, a); + } +)"}; + +static constexpr auto kRnorm{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm_kernel_v1(int* dim, const double* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v2(Dummy dim, const double* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v3(int dim, const int* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v4(int dim, const char* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v5(int dim, const short* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v6(int dim, const long* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v7(int dim, const long long* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v8(int dim, const float* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v9(int dim, const Dummy* a) { + double result = rnorm(dim, a); + } + __global__ void rnormf_kernel_v1(int* dim, const float* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v2(Dummy dim, const float* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v3(int dim, const int* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v4(int dim, const char* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v5(int dim, const short* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v6(int dim, const long* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v7(int dim, const long long* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v8(int dim, const double* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v9(int dim, const Dummy* a) { + double result = rnormf(dim, a); + } +)"}; diff --git a/projects/hip-tests/catch/unit/math/root_funcs.cc b/projects/hip-tests/catch/unit/math/root_funcs.cc new file mode 100644 index 0000000000..1638ca8b04 --- /dev/null +++ b/projects/hip-tests/catch/unit/math/root_funcs.cc @@ -0,0 +1,604 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" +#include "quaternary_common.hh" +#include "math_root_negative_kernels_rtc.hh" + +/** + * @addtogroup RootMathFuncs RootMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +MATH_UNARY_KERNEL_DEF(sqrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `sqrtf(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrtf_Accuracy_Positive") { + float (*ref)(float) = std::sqrt; + UnarySinglePrecisionTest(sqrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `sqrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `double std::sqrt(double)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrt_Accuracy_Positive") { + double (*ref)(double) = std::sqrt; + UnaryDoublePrecisionTest(sqrt_kernel, ref, ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for sqrtf and sqrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrt_sqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSqrt); } + +MATH_UNARY_KERNEL_DEF(rsqrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rsqrtf(x)` for all possible inputs. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrtf_Accuracy_Positive") { + auto rsqrt_ref = [](double arg) -> double { return 1. / std::sqrt(arg); }; + double (*ref)(double) = rsqrt_ref; + UnarySinglePrecisionTest(rsqrt_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rsqrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrt_Accuracy_Positive") { + auto rsqrt_ref = [](long double arg) -> long double { return 1.L / std::sqrt(arg); }; + long double (*ref)(long double) = rsqrt_ref; + UnaryDoublePrecisionTest(rsqrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for rsqrtf and rsqrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrt_rsqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRsqrt); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cbrtf(x)` for all possible inputs and `cbrt(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::cbrt(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_TEST_DEF(cbrt, std::cbrt, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cbrtf and cbrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cbrt_cbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCbrt); } + +MATH_UNARY_KERNEL_DEF(rcbrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rcbrtf(x)` for all possible inputs. The maximum ulp error + * is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrtf_Accuracy_Positive") { + auto rcbrt_ref = [](double arg) -> double { return 1. / std::cbrt(arg); }; + double (*ref)(double) = rcbrt_ref; + UnarySinglePrecisionTest(rcbrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rcbrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrt_Accuracy_Positive") { + auto rcbrt_ref = [](long double arg) -> long double { return 1. / std::cbrt(arg); }; + long double (*ref)(long double) = rcbrt_ref; + UnaryDoublePrecisionTest(rcbrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for rcbrtf and rcbrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrt_rcbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRcbrt); } + +/********** Binary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hypotf(x, y)` and `hypot(x, y)` against a table of + * difficult values, followed by a large number of randomly generated values. The results are + * compared against reference function `T std::hypot(T, T)`. The maximum ulp error for single + * precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_WITHIN_ULP_TEST_DEF(hypot, std::hypot, 3, 2) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for hypotf and hypot. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_hypot_hypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kHypot); } + +MATH_BINARY_KERNEL_DEF(rhypot) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rhypotf(x, y)` and `rhypot(x, y)`against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rhypot_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rhypot_ref = [](RT arg1, RT arg2) -> RT { return 1. / std::hypot(arg1, arg2); }; + RT (*ref)(RT, RT) = rhypot_ref; + const auto ulp = std::is_same_v ? 2 : 1; + BinaryFloatingPointTest(rhypot_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rhypotf and rhypot. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rhypot_rhypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kRhypot); } + +/********** Ternary Functions **********/ + +MATH_TERNARY_KERNEL_DEF(norm3d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `norm3df(x, y, z)` and `norm3d(x, y, z)` against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm3d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto norm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) { + return std::numeric_limits::infinity(); + } + return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); + }; + RT (*ref)(RT, RT, RT) = norm3d_ref; + const auto ulp = std::is_same_v ? 3 : 2; + TernaryFloatingPointTest(norm3d_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for norm3df and norm3d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm3d_norm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kNorm3D); } + +MATH_TERNARY_KERNEL_DEF(rnorm3d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rnorm3df(x, y, z)` and `rnorm3d(x, y, z)`against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm3d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) { + return 0; + } + return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); + }; + RT (*ref)(RT, RT, RT) = rnorm3d_ref; + const auto ulp = std::is_same_v ? 2 : 1; + TernaryFloatingPointTest(rnorm3d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnorm3df and rnorm3d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm3d_rnorm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kRnorm3D); } + +/********** Quaternary Functions **********/ + +MATH_QUATERNARY_KERNEL_DEF(norm4d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `norm4df(x, y, z, t)` and `norm4d(x, y, z, t)` against a + * table of difficult values, followed by a large number of randomly generated values. The maximum + * ulp error for single precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm4d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto norm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) { + return std::numeric_limits::infinity(); + } + return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); + }; + RT (*ref)(RT, RT, RT, RT) = norm4d_ref; + const auto ulp = std::is_same_v ? 3 : 2; + QuaternaryFloatingPointTest(norm4d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for norm4df and norm4d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm4d_norm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kNorm4D); } + +MATH_QUATERNARY_KERNEL_DEF(rnorm4d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rnorm4df(x, y, z, t)` and `rnorm4d(x, y, z, t)`against a + * table of difficult values, followed by a large number of randomly generated values. The maximum + * ulp error for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm4d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) { + return 0; + } + return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); + }; + RT (*ref)(RT, RT, RT, RT) = rnorm4d_ref; + const auto ulp = std::is_same_v ? 2 : 1; + QuaternaryFloatingPointTest(rnorm4d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnorm4df and rnorm4d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm4d_rnorm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kRnorm4D); } + +/********** norm Function **********/ + +#define MATH_NORM_KERNEL_DEF(func_name) \ + template __global__ void func_name##_kernel(T* const ys, int dim, T* const x1s) { \ + if constexpr (std::is_same_v) { \ + *ys = func_name##f(dim, x1s); \ + } else if constexpr (std::is_same_v) { \ + *ys = func_name(dim, x1s); \ + } \ + } + +template +void NormSimpleTest(F kernel, RF ref_func, const ValidatorBuilder& validator_builder) { + const auto max_dim = 10000; + + LinearAllocGuard x{LinearAllocs::hipHostMalloc, max_dim * sizeof(T)}; + LinearAllocGuard x_dev{LinearAllocs::hipMalloc, max_dim * sizeof(T)}; + LinearAllocGuard y{LinearAllocs::hipHostMalloc, sizeof(T)}; + LinearAllocGuard y_dev{LinearAllocs::hipMalloc, sizeof(T)}; + + std::fill_n(x.ptr(), max_dim, 1); + HIP_CHECK(hipMemcpy(x_dev.ptr(), x.ptr(), max_dim * sizeof(T), hipMemcpyHostToDevice)); + + for (uint64_t i = 1u; i < max_dim; i++) { + kernel<<<1, 1>>>(y_dev.ptr(), i, x_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y.ptr(), y_dev.ptr(), sizeof(T), hipMemcpyDeviceToHost)); + const auto actual_val = *y.ptr(); + const auto ref_val = static_cast(ref_func(i, x.ptr())); + const auto validator = validator_builder(ref_val); + + if (!validator->match(actual_val)) { + std::stringstream ss; + ss << std::scientific << std::setprecision(std::numeric_limits::max_digits10 - 1); + ss << "Validation fails for dim: " << i << " " << actual_val << " " << ref_val; + INFO(ss.str()); + REQUIRE(false); + } + } +} + +MATH_NORM_KERNEL_DEF(norm) + +/** + * Test Description + * ------------------------ + * - Sanity test for `normf(dim, arr)` and `norm(dim, arr)`. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm_Sanity_Positive", "", float, double) { + using RT = RefType_t; + auto norm_ref = [](int dim, TestType* args) -> RT { + RT sum = 0; + for (int i = 0; i < dim; i++) { + if (std::isinf(args[i])) return std::numeric_limits::infinity(); + sum += static_cast(args[i]) * static_cast(args[i]); + } + return std::sqrt(sum); + }; + RT (*ref)(int, TestType*) = norm_ref; + + NormSimpleTest(norm_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for normf and norm. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm_normf_Negative_RTC") { NegativeTestRTCWrapper<18>(kNorm); } + +MATH_NORM_KERNEL_DEF(rnorm) + +/** + * Test Description + * ------------------------ + * - Sanity test for `rnormf(dim, arr)` and `rnorm(dim, arr)`. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm_Sanity_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm_ref = [](int dim, TestType* args) -> RT { + RT sum = 0; + for (int i = 0; i < dim; i++) { + if (std::isinf(args[i])) return std::numeric_limits::infinity(); + sum += static_cast(args[i]) * static_cast(args[i]); + } + return 1. / std::sqrt(sum); + }; + RT (*ref)(int, TestType*) = rnorm_ref; + + NormSimpleTest(rnorm_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnormf and rnorm. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm_rnormf_Negative_RTC") { NegativeTestRTCWrapper<18>(kRnorm); }