Add Dot functions as amd_mixed_dot function

Introduce the Dot functions which are available in the device library. Forward those prototypes, and introduce HIP API to expose the usage of the dot functions.


[ROCm/clr commit: 84a028b9e1]
This commit is contained in:
Aaron Enye Shi
2019-01-11 23:22:07 +00:00
orang tua aa02bb3ecb
melakukan 896b320c5f
2 mengubah file dengan 62 tambahan dan 0 penghapusan
@@ -23,6 +23,7 @@ THE SOFTWARE.
#pragma once
#include "hip_fp16_math_fwd.h"
#include "hip_vector_types.h"
#include "math_fwd.h"
#include <hip/hcc_detail/host_defines.h>
@@ -119,6 +120,43 @@ uint64_t __make_mantissa(const char* tagp)
return __make_mantissa_base10(tagp);
}
// DOT FUNCTIONS
__DEVICE__
inline
float amd_mixed_dot(__2f16 a, __2f16 b, float c, bool saturate) {
return __ockl_fdot2(a, b, c, saturate);
}
__DEVICE__
inline
int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
return __ockl_sdot2(a, b, c, saturate);
}
__DEVICE__
inline
uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
return __ockl_udot2(a, b, c, saturate);
}
__DEVICE__
inline
int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
return __ockl_sdot4(a, b, c, saturate);
}
__DEVICE__
inline
uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
return __ockl_udot4(a, b, c, saturate);
}
__DEVICE__
inline
int amd_mixed_dot(int a, int b, int c, bool saturate) {
return __ockl_sdot8(a, b, c, saturate);
}
__DEVICE__
inline
uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
return __ockl_udot8(a, b, c, saturate);
}
// BEGIN FLOAT
__DEVICE__
inline
@@ -28,6 +28,30 @@ THE SOFTWARE.
extern "C" {
#endif
// DOT FUNCTIONS
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
__device__
__attribute__((const))
float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s);
__device__
__attribute__((const))
int __ockl_sdot2(short2 a, short2 b, int c, bool s);
__device__
__attribute__((const))
unsigned int __ockl_udot2(ushort2 a, ushort2 b, unsigned int c, bool s);
__device__
__attribute__((const))
int __ockl_sdot4(char4 a, char4 b, int c, bool s);
__device__
__attribute__((const))
unsigned int __ockl_udot4(uchar4 a, uchar4 b, unsigned int c, bool s);
__device__
__attribute__((const))
int __ockl_sdot8(int a, int b, int c, bool s);
__device__
__attribute__((const))
unsigned int __ockl_udot8(unsigned int a, unsigned int b, unsigned int c, bool s);
// BEGIN FLOAT
__device__
__attribute__((const))