diff --git a/hipamd/include/hip/hcc_detail/hip_fp16.h b/hipamd/include/hip/hcc_detail/hip_fp16.h index 5a6e650069..849b7278d8 100644 --- a/hipamd/include/hip/hcc_detail/hip_fp16.h +++ b/hipamd/include/hip/hcc_detail/hip_fp16.h @@ -1403,6 +1403,15 @@ THE SOFTWARE. } // Math functions + #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ + inline + __device__ + float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) { + return __ockl_fdot2(static_cast<__half2_raw>(a).data, + static_cast<__half2_raw>(b).data, + c, saturate); + } + #endif inline __device__ __half htrunc(__half x) diff --git a/hipamd/include/hip/hcc_detail/hip_fp16_math_fwd.h b/hipamd/include/hip/hcc_detail/hip_fp16_math_fwd.h index 83fd1349b1..eeb617c40b 100644 --- a/hipamd/include/hip/hcc_detail/hip_fp16_math_fwd.h +++ b/hipamd/include/hip/hcc_detail/hip_fp16_math_fwd.h @@ -53,6 +53,10 @@ extern "C" typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); typedef short __2i16 __attribute__((ext_vector_type(2))); + #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ + __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s); + #endif + __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); @@ -75,4 +79,4 @@ extern "C" __device__ __2f16 __ocml_sin_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); -} \ No newline at end of file +} diff --git a/hipamd/include/hip/hcc_detail/math_functions.h b/hipamd/include/hip/hcc_detail/math_functions.h index 3e9db0d1f6..6374375491 100644 --- a/hipamd/include/hip/hcc_detail/math_functions.h +++ b/hipamd/include/hip/hcc_detail/math_functions.h @@ -23,6 +23,7 @@ THE SOFTWARE. #pragma once #include "hip_fp16_math_fwd.h" +#include "hip_vector_types.h" #include "math_fwd.h" #include @@ -119,6 +120,40 @@ uint64_t __make_mantissa(const char* tagp) return __make_mantissa_base10(tagp); } +// DOT FUNCTIONS +#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ +__DEVICE__ +inline +int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) { + return __ockl_sdot2(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) { + return __ockl_udot2(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) { + return __ockl_sdot4(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) { + return __ockl_udot4(a.data, b.data, c, saturate); +} +__DEVICE__ +inline +int amd_mixed_dot(int a, int b, int c, bool saturate) { + return __ockl_sdot8(a, b, c, saturate); +} +__DEVICE__ +inline +uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { + return __ockl_udot8(a, b, c, saturate); +} +#endif + // BEGIN FLOAT __DEVICE__ inline diff --git a/hipamd/include/hip/hcc_detail/math_fwd.h b/hipamd/include/hip/hcc_detail/math_fwd.h index cf8aeb9c6c..c25b5e90b4 100644 --- a/hipamd/include/hip/hcc_detail/math_fwd.h +++ b/hipamd/include/hip/hcc_detail/math_fwd.h @@ -28,6 +28,45 @@ THE SOFTWARE. extern "C" { #endif +// DOT FUNCTIONS +#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ +__device__ +__attribute__((const)) +int __ockl_sdot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot2( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ +__attribute__((const)) +int __ockl_sdot4( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot4( + HIP_vector_base::Native_vec_, + HIP_vector_base::Native_vec_, + unsigned int, bool); + +__device__ +__attribute__((const)) +int __ockl_sdot8(int, int, int, bool); + +__device__ +__attribute__((const)) +unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool); +#endif + // BEGIN FLOAT __device__ __attribute__((const)) diff --git a/hipamd/tests/src/deviceLib/hipTestDotFunctions.cpp b/hipamd/tests/src/deviceLib/hipTestDotFunctions.cpp new file mode 100644 index 0000000000..51e005f7fb --- /dev/null +++ b/hipamd/tests/src/deviceLib/hipTestDotFunctions.cpp @@ -0,0 +1,69 @@ +/* +Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 + * RUN: %t EXCLUDE_HIP_PLATFORM nvcc + * HIT_END + */ + +#include +#include +#include "test_common.h" + + +__global__ void DotFunctions(bool* result) { + #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ + // Dot Functions + short2 sa{1}, sb{1}; + result[0] = amd_mixed_dot(sa, sb, 1, result[0]) && result[0]; + + ushort2 usa{1}, usb{1}; + result[0] = amd_mixed_dot(usa, usb, (uint) 1, result[0]) && result[0]; + + char4 ca{1}, cb{1}; + result[0] = amd_mixed_dot(ca, cb, 1, result[0]) && result[0]; + + uchar4 uca{1}, ucb{1}; + result[0] = amd_mixed_dot(uca, ucb, (uint) 1, result[0]) && result[0]; + + int ia{1}, ib{1}; + result[0] = amd_mixed_dot(ia, ib, 1, result[0]) && result[0]; + + uint ua{1}, ub{1}; + result[0] = amd_mixed_dot(ua, ub, (uint) 1, result[0]) && result[0]; + #endif +} + +int main() { + bool* result{nullptr}; + hipHostMalloc(&result, 1); + result[0] = true; + + hipLaunchKernelGGL(DotFunctions, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result); + hipDeviceSynchronize(); + if (!result[0]) { failed("Failed dot tests."); } + + hipHostFree(result); + + passed(); +} diff --git a/hipamd/tests/src/deviceLib/hipTestNativeHalf.cpp b/hipamd/tests/src/deviceLib/hipTestNativeHalf.cpp index 6e618618f6..10a3f33a27 100644 --- a/hipamd/tests/src/deviceLib/hipTestNativeHalf.cpp +++ b/hipamd/tests/src/deviceLib/hipTestNativeHalf.cpp @@ -156,6 +156,11 @@ void __half2Test(bool* result, __half2 a) { result[0] = (a >= a) && result[0]; result[0] = !(a < a) && result[0]; result[0] = !(a > a) && result[0]; + + #if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__ + // Dot Functions + result[0] = amd_mixed_dot(a, a, 1, 1) && result[0]; + #endif } #endif