EXSWHTEC-329 - Implement tests for half-precision arithmetic functions #410
Change-Id: I754e1aa8a84d775340b9037edb90e75431849bb0
[ROCm/hip-tests commit: 6ae8cb9e7c]
Этот коммит содержится в:
коммит произвёл
Rakesh Roy
родитель
5112132e83
Коммит
9bd163fed0
@@ -34,6 +34,7 @@ set(TEST_SRC
|
||||
casting_int_funcs.cc
|
||||
casting_half2_funcs.cc
|
||||
half_precision_math.cc
|
||||
half_precision_arithmetic.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
@@ -127,3 +128,7 @@ add_test(NAME Unit_Half_Precision_Math_Negative
|
||||
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
half_precision_math_negative_kernels.cc 60)
|
||||
add_test(NAME Unit_Half_Precision_Arithmetic_Negative
|
||||
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
half_precision_arithmetic_negative_kernels.cc 88)
|
||||
|
||||
@@ -0,0 +1,441 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "half_precision_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup HalfPrecisionArithmetic HalfPrecisionArithmetic
|
||||
* @{
|
||||
* @ingroup MathTest
|
||||
*/
|
||||
|
||||
|
||||
MATH_UNARY_HP_KERNEL_DEF(__habs);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__habs(x)` for all possible inputs. The results are
|
||||
* compared against reference function `float std::abs(float)`.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_UNARY_HP_TEST_DEF_IMPL(__habs, static_cast<float (*)(float)>(std::abs),
|
||||
EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_UNARY_HP_KERNEL_DEF(__habs2);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__habs2(x)` for all possible inputs. The results are
|
||||
* compared against reference function `float std::abs(float)`.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_UNARY_HP_TEST_DEF_IMPL(__habs2, static_cast<float (*)(float)>(std::abs),
|
||||
EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hneg_ref(float x) { return -x; }
|
||||
|
||||
MATH_UNARY_HP_KERNEL_DEF(__hneg);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hneg(x)` for all possible inputs. The error bounds are
|
||||
* IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_UNARY_HP_TEST_DEF_IMPL(__hneg, __hneg_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_UNARY_HP_KERNEL_DEF(__hneg2);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hneg2(x)` for all possible inputs. The error bounds are
|
||||
* IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_UNARY_HP_TEST_DEF_IMPL(__hneg2, __hneg_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
// Wrapper to avoid ambiguity error with __hadd(int, int)
|
||||
__device__ __half __hadd_wrapper(__half x1, __half x2) { return __hadd(x1, x2); }
|
||||
|
||||
static float __hadd_ref(float x1, float x2) { return x1 + x2; }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hadd_wrapper);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hadd(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hadd_wrapper, __hadd_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hadd2);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hadd2(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hadd2, __hadd_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hadd_sat_ref(float x1, float x2) { return std::clamp(x1 + x2, 0.0f, 1.0f); }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hadd_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hadd_sat(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hadd_sat, __hadd_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hadd2_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hadd2_sat(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hadd2_sat, __hadd_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hsub_ref(float x1, float x2) { return x1 - x2; }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hsub);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hsub(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hsub, __hsub_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hsub2);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hsub2(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hsub2, __hsub_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hsub_sat_ref(float x1, float x2) { return std::clamp(x1 - x2, 0.0f, 1.0f); }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hsub_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hsub_sat(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hsub_sat, __hsub_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hsub2_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hsub2_sat(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hsub2_sat, __hsub_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hmul_ref(float x1, float x2) { return x1 * x2; }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hmul);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hmul(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hmul, __hmul_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hmul2);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hmul2(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hmul2, __hmul_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hmul_sat_ref(float x1, float x2) { return std::clamp(x1 * x2, 0.0f, 1.0f); }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hmul_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hmul_sat(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hmul_sat, __hmul_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hmul2_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hmul2_sat(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hmul2_sat, __hmul_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hdiv_ref(float x1, float x2) { return x1 / x2; }
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__hdiv);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hdiv(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__hdiv, __hdiv_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_BINARY_HP_KERNEL_DEF(__h2div);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__h2div(x,y)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_BINARY_HP_TEST_DEF_IMPL(__h2div, __hdiv_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
MATH_TERNARY_HP_KERNEL_DEF(__hfma);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hfma(x,y,z)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma, static_cast<float (*)(float, float, float)>(std::fma),
|
||||
EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_TERNARY_HP_KERNEL_DEF(__hfma2);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hfma2(x,y,z)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma2, static_cast<float (*)(float, float, float)>(std::fma),
|
||||
EqValidatorBuilderFactory<float>());
|
||||
|
||||
|
||||
static float __hfma_sat_ref(float x1, float x2, float x3) {
|
||||
return std::clamp(std::fma(x1, x2, x3), 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
MATH_TERNARY_HP_KERNEL_DEF(__hfma_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hfma_sat(x,y,z)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma_sat, __hfma_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
|
||||
MATH_TERNARY_HP_KERNEL_DEF(__hfma2_sat);
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Tests the numerical accuracy of `__hfma2_sat(x,y,z)` against a table of difficult values,
|
||||
* followed by a large number of randomly generated values. The error bounds are IEEE-compliant.
|
||||
*
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/math/half_precision_arithmetic.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma2_sat, __hfma_sat_ref, EqValidatorBuilderFactory<float>());
|
||||
+124
@@ -0,0 +1,124 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
|
||||
#define UNARY_HALF_NEGATIVE_KERNELS(func_name) \
|
||||
__global__ void func_name##_kernel_v1(__half* x) { __half result = func_name(x); } \
|
||||
__global__ void func_name##_kernel_v2(Dummy x) { __half result = func_name(x); }
|
||||
|
||||
#define BINARY_HALF_NEGATIVE_KERNELS(func_name) \
|
||||
__global__ void func_name##_kernel_v1(__half* x, __half y) { __half result = func_name(x, y); } \
|
||||
__global__ void func_name##_kernel_v2(__half x, __half* y) { __half result = func_name(x, y); } \
|
||||
__global__ void func_name##_kernel_v3(Dummy x, __half y) { __half result = func_name(x, y); } \
|
||||
__global__ void func_name##_kernel_v4(__half x, Dummy y) { __half result = func_name(x, y); }
|
||||
|
||||
#define TERNARY_HALF_NEGATIVE_KERNELS(func_name) \
|
||||
__global__ void func_name##_kernel_v1(__half* x, __half y, __half z) { \
|
||||
__half result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v2(__half x, __half* y, __half z) { \
|
||||
__half result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v3(__half x, __half y, __half* z) { \
|
||||
__half result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v4(Dummy x, __half y, __half z) { \
|
||||
__half result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v5(__half x, Dummy y, __half z) { \
|
||||
__half result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v6(__half x, __half y, Dummy z) { \
|
||||
__half result = func_name(x, y, z); \
|
||||
}
|
||||
|
||||
UNARY_HALF_NEGATIVE_KERNELS(__habs)
|
||||
UNARY_HALF_NEGATIVE_KERNELS(__hneg)
|
||||
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hadd)
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hadd_sat)
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hsub)
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hsub_sat)
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hmul)
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hmul_sat)
|
||||
BINARY_HALF_NEGATIVE_KERNELS(__hdiv)
|
||||
|
||||
TERNARY_HALF_NEGATIVE_KERNELS(__hfma)
|
||||
TERNARY_HALF_NEGATIVE_KERNELS(__hfma_sat)
|
||||
|
||||
|
||||
#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \
|
||||
__global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \
|
||||
__global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); }
|
||||
|
||||
#define BINARY_HALF2_NEGATIVE_KERNELS(func_name) \
|
||||
__global__ void func_name##_kernel_v1(__half2* x, __half2 y) { \
|
||||
__half2 result = func_name(x, y); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v2(__half2 x, __half2* y) { \
|
||||
__half2 result = func_name(x, y); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v3(Dummy x, __half2 y) { __half2 result = func_name(x, y); } \
|
||||
__global__ void func_name##_kernel_v4(__half2 x, Dummy y) { __half2 result = func_name(x, y); }
|
||||
|
||||
#define TERNARY_HALF2_NEGATIVE_KERNELS(func_name) \
|
||||
__global__ void func_name##_kernel_v1(__half2* x, __half2 y, __half2 z) { \
|
||||
__half2 result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v2(__half2 x, __half2* y, __half2 z) { \
|
||||
__half2 result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v3(__half2 x, __half2 y, __half2* z) { \
|
||||
__half2 result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v4(Dummy x, __half2 y, __half2 z) { \
|
||||
__half2 result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v5(__half2 x, Dummy y, __half2 z) { \
|
||||
__half2 result = func_name(x, y, z); \
|
||||
} \
|
||||
__global__ void func_name##_kernel_v6(__half2 x, __half2 y, Dummy z) { \
|
||||
__half2 result = func_name(x, y, z); \
|
||||
}
|
||||
|
||||
UNARY_HALF2_NEGATIVE_KERNELS(__habs2)
|
||||
UNARY_HALF2_NEGATIVE_KERNELS(__hneg2)
|
||||
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__hadd2)
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__hadd2_sat)
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__hsub2)
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__hsub2_sat)
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__hmul2)
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__hmul2_sat)
|
||||
BINARY_HALF2_NEGATIVE_KERNELS(__h2div)
|
||||
|
||||
TERNARY_HALF2_NEGATIVE_KERNELS(__hfma2)
|
||||
TERNARY_HALF2_NEGATIVE_KERNELS(__hfma2_sat)
|
||||
Ссылка в новой задаче
Block a user