Merge pull request #863 from aaronenyeshi/add-dot-instructions
Add Dot functions as amd_mixed_dot function
Этот коммит содержится в:
@@ -1403,6 +1403,15 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
// Math functions
|
||||
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
|
||||
inline
|
||||
__device__
|
||||
float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
|
||||
return __ockl_fdot2(static_cast<__half2_raw>(a).data,
|
||||
static_cast<__half2_raw>(b).data,
|
||||
c, saturate);
|
||||
}
|
||||
#endif
|
||||
inline
|
||||
__device__
|
||||
__half htrunc(__half x)
|
||||
|
||||
@@ -53,6 +53,10 @@ extern "C"
|
||||
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
|
||||
typedef short __2i16 __attribute__((ext_vector_type(2)));
|
||||
|
||||
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
|
||||
__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s);
|
||||
#endif
|
||||
|
||||
__device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
|
||||
__device__ __2f16 __ocml_cos_2f16(__2f16);
|
||||
__device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
|
||||
@@ -75,4 +79,4 @@ extern "C"
|
||||
__device__ __2f16 __ocml_sin_2f16(__2f16);
|
||||
__device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
|
||||
__device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ THE SOFTWARE.
|
||||
#pragma once
|
||||
|
||||
#include "hip_fp16_math_fwd.h"
|
||||
#include "hip_vector_types.h"
|
||||
#include "math_fwd.h"
|
||||
|
||||
#include <hip/hcc_detail/host_defines.h>
|
||||
@@ -119,6 +120,40 @@ uint64_t __make_mantissa(const char* tagp)
|
||||
return __make_mantissa_base10(tagp);
|
||||
}
|
||||
|
||||
// DOT FUNCTIONS
|
||||
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
|
||||
__DEVICE__
|
||||
inline
|
||||
int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
|
||||
return __ockl_sdot2(a.data, b.data, c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
|
||||
return __ockl_udot2(a.data, b.data, c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
|
||||
return __ockl_sdot4(a.data, b.data, c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
|
||||
return __ockl_udot4(a.data, b.data, c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
int amd_mixed_dot(int a, int b, int c, bool saturate) {
|
||||
return __ockl_sdot8(a, b, c, saturate);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
|
||||
return __ockl_udot8(a, b, c, saturate);
|
||||
}
|
||||
#endif
|
||||
|
||||
// BEGIN FLOAT
|
||||
__DEVICE__
|
||||
inline
|
||||
|
||||
@@ -28,6 +28,45 @@ THE SOFTWARE.
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// DOT FUNCTIONS
|
||||
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
|
||||
__device__
|
||||
__attribute__((const))
|
||||
int __ockl_sdot2(
|
||||
HIP_vector_base<short, 2>::Native_vec_,
|
||||
HIP_vector_base<short, 2>::Native_vec_,
|
||||
int, bool);
|
||||
|
||||
__device__
|
||||
__attribute__((const))
|
||||
unsigned int __ockl_udot2(
|
||||
HIP_vector_base<unsigned short, 2>::Native_vec_,
|
||||
HIP_vector_base<unsigned short, 2>::Native_vec_,
|
||||
unsigned int, bool);
|
||||
|
||||
__device__
|
||||
__attribute__((const))
|
||||
int __ockl_sdot4(
|
||||
HIP_vector_base<char, 4>::Native_vec_,
|
||||
HIP_vector_base<char, 4>::Native_vec_,
|
||||
int, bool);
|
||||
|
||||
__device__
|
||||
__attribute__((const))
|
||||
unsigned int __ockl_udot4(
|
||||
HIP_vector_base<unsigned char, 4>::Native_vec_,
|
||||
HIP_vector_base<unsigned char, 4>::Native_vec_,
|
||||
unsigned int, bool);
|
||||
|
||||
__device__
|
||||
__attribute__((const))
|
||||
int __ockl_sdot8(int, int, int, bool);
|
||||
|
||||
__device__
|
||||
__attribute__((const))
|
||||
unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool);
|
||||
#endif
|
||||
|
||||
// BEGIN FLOAT
|
||||
__device__
|
||||
__attribute__((const))
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11
|
||||
* RUN: %t EXCLUDE_HIP_PLATFORM nvcc
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/math_functions.h>
|
||||
#include "test_common.h"
|
||||
|
||||
|
||||
__global__ void DotFunctions(bool* result) {
|
||||
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
|
||||
// Dot Functions
|
||||
short2 sa{1}, sb{1};
|
||||
result[0] = amd_mixed_dot(sa, sb, 1, result[0]) && result[0];
|
||||
|
||||
ushort2 usa{1}, usb{1};
|
||||
result[0] = amd_mixed_dot(usa, usb, (uint) 1, result[0]) && result[0];
|
||||
|
||||
char4 ca{1}, cb{1};
|
||||
result[0] = amd_mixed_dot(ca, cb, 1, result[0]) && result[0];
|
||||
|
||||
uchar4 uca{1}, ucb{1};
|
||||
result[0] = amd_mixed_dot(uca, ucb, (uint) 1, result[0]) && result[0];
|
||||
|
||||
int ia{1}, ib{1};
|
||||
result[0] = amd_mixed_dot(ia, ib, 1, result[0]) && result[0];
|
||||
|
||||
uint ua{1}, ub{1};
|
||||
result[0] = amd_mixed_dot(ua, ub, (uint) 1, result[0]) && result[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
int main() {
|
||||
bool* result{nullptr};
|
||||
hipHostMalloc(&result, 1);
|
||||
result[0] = true;
|
||||
|
||||
hipLaunchKernelGGL(DotFunctions, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result);
|
||||
hipDeviceSynchronize();
|
||||
if (!result[0]) { failed("Failed dot tests."); }
|
||||
|
||||
hipHostFree(result);
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -156,6 +156,11 @@ void __half2Test(bool* result, __half2 a) {
|
||||
result[0] = (a >= a) && result[0];
|
||||
result[0] = !(a < a) && result[0];
|
||||
result[0] = !(a > a) && result[0];
|
||||
|
||||
#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__
|
||||
// Dot Functions
|
||||
result[0] = amd_mixed_dot(a, a, 1, 1) && result[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Ссылка в новой задаче
Block a user