SWDEV-283266 - __HIPCC_RTC__ macro added to HIP headers
For hipRTC on Windows, add macro __HIPCC_RTC__ to allow online compilation of with device functions excluding standard C/C++ headers, system headers, and host HIP APIs. Change-Id: I1d91f042baf1359856ec83ab7030dc58785e0334
This commit is contained in:
@@ -26,9 +26,10 @@ THE SOFTWARE.
|
||||
#include "host_defines.h"
|
||||
#include "math_fwd.h"
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/amd_detail/device_library_decls.h>
|
||||
|
||||
@@ -27,9 +27,11 @@ THE SOFTWARE.
|
||||
// It's defined here for workarround of rocThrust building failure.
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#ifndef __cplusplus
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
typedef void* hipDeviceptr_t;
|
||||
typedef enum hipChannelFormatKind {
|
||||
@@ -92,6 +94,7 @@ typedef struct hipArray {
|
||||
unsigned int textureType;
|
||||
}hipArray;
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
typedef struct hip_Memcpy2D {
|
||||
size_t srcXInBytes;
|
||||
size_t srcY;
|
||||
@@ -110,7 +113,7 @@ typedef struct hip_Memcpy2D {
|
||||
size_t WidthInBytes;
|
||||
size_t Height;
|
||||
} hip_Memcpy2D;
|
||||
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
typedef struct hipArray* hipArray_t;
|
||||
typedef hipArray_t hiparray;
|
||||
@@ -359,6 +362,7 @@ typedef struct HIP_RESOURCE_VIEW_DESC_st
|
||||
* Memory copy types
|
||||
*
|
||||
*/
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
typedef enum hipMemcpyKind {
|
||||
hipMemcpyHostToHost = 0, ///< Host-to-Host Copy
|
||||
hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy
|
||||
@@ -470,5 +474,5 @@ typedef enum hipFunction_attribute {
|
||||
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
|
||||
HIP_FUNC_ATTRIBUTE_MAX
|
||||
}hipFunction_attribute;
|
||||
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
#endif
|
||||
|
||||
@@ -111,7 +111,9 @@ float atomicAdd_system(float* address, float val) {
|
||||
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
DEPRECATED("use atomicAdd instead")
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
__device__
|
||||
inline
|
||||
void atomicAddNoRet(float* address, float val)
|
||||
@@ -476,7 +478,9 @@ float atomicAdd(float* address, float val)
|
||||
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
DEPRECATED("use atomicAdd instead")
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
__device__
|
||||
inline
|
||||
void atomicAddNoRet(float* address, float val)
|
||||
|
||||
@@ -25,6 +25,10 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/amd_detail/hip_vector_types.h"
|
||||
|
||||
#if defined(__HIPCC_RTC__)
|
||||
#define __HOST_DEVICE__ __device__
|
||||
#else
|
||||
#define __HOST_DEVICE__ __host__ __device__
|
||||
// TODO: Clang has a bug which allows device functions to call std functions
|
||||
// when std functions are introduced into default namespace by using statement.
|
||||
// math.h may be included after this bug is fixed.
|
||||
@@ -33,10 +37,11 @@ THE SOFTWARE.
|
||||
#else
|
||||
#include "math.h"
|
||||
#endif
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#if __cplusplus
|
||||
#define COMPLEX_NEG_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type operator-(const type& op) { \
|
||||
__HOST_DEVICE__ static inline type operator-(const type& op) { \
|
||||
type ret; \
|
||||
ret.x = -op.x; \
|
||||
ret.y = -op.y; \
|
||||
@@ -44,17 +49,17 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
#define COMPLEX_EQ_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline bool operator==(const type& lhs, const type& rhs) { \
|
||||
return lhs.x == rhs.x && lhs.y == rhs.y; \
|
||||
}
|
||||
|
||||
#define COMPLEX_NE_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline bool operator!=(const type& lhs, const type& rhs) { \
|
||||
return !(lhs == rhs); \
|
||||
}
|
||||
|
||||
#define COMPLEX_ADD_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type operator+(const type& lhs, const type& rhs) { \
|
||||
type ret; \
|
||||
ret.x = lhs.x + rhs.x; \
|
||||
ret.y = lhs.y + rhs.y; \
|
||||
@@ -62,7 +67,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
#define COMPLEX_SUB_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type operator-(const type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type operator-(const type& lhs, const type& rhs) { \
|
||||
type ret; \
|
||||
ret.x = lhs.x - rhs.x; \
|
||||
ret.y = lhs.y - rhs.y; \
|
||||
@@ -70,7 +75,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
#define COMPLEX_MUL_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type operator*(const type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type operator*(const type& lhs, const type& rhs) { \
|
||||
type ret; \
|
||||
ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \
|
||||
ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \
|
||||
@@ -78,7 +83,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
#define COMPLEX_DIV_OP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type operator/(const type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type operator/(const type& lhs, const type& rhs) { \
|
||||
type ret; \
|
||||
ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \
|
||||
ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \
|
||||
@@ -88,33 +93,33 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
#define COMPLEX_ADD_PREOP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type& operator+=(type& lhs, const type& rhs) { \
|
||||
lhs.x += rhs.x; \
|
||||
lhs.y += rhs.y; \
|
||||
return lhs; \
|
||||
}
|
||||
|
||||
#define COMPLEX_SUB_PREOP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type& operator-=(type& lhs, const type& rhs) { \
|
||||
lhs.x -= rhs.x; \
|
||||
lhs.y -= rhs.y; \
|
||||
return lhs; \
|
||||
}
|
||||
|
||||
#define COMPLEX_MUL_PREOP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type& operator*=(type& lhs, const type& rhs) { \
|
||||
lhs = lhs * rhs; \
|
||||
return lhs; \
|
||||
}
|
||||
|
||||
#define COMPLEX_DIV_PREOP_OVERLOAD(type) \
|
||||
__device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) { \
|
||||
__HOST_DEVICE__ static inline type& operator/=(type& lhs, const type& rhs) { \
|
||||
lhs = lhs / rhs; \
|
||||
return lhs; \
|
||||
}
|
||||
|
||||
#define COMPLEX_SCALAR_PRODUCT(type, type1) \
|
||||
__device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \
|
||||
__HOST_DEVICE__ static inline type operator*(const type& lhs, type1 rhs) { \
|
||||
type ret; \
|
||||
ret.x = lhs.x * rhs; \
|
||||
ret.y = lhs.y * rhs; \
|
||||
@@ -125,41 +130,41 @@ THE SOFTWARE.
|
||||
|
||||
typedef float2 hipFloatComplex;
|
||||
|
||||
__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
|
||||
__HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
|
||||
|
||||
__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
|
||||
__HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
|
||||
hipFloatComplex z;
|
||||
z.x = a;
|
||||
z.y = b;
|
||||
return z;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
|
||||
hipFloatComplex ret;
|
||||
ret.x = z.x;
|
||||
ret.y = -z.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) {
|
||||
__HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) {
|
||||
return z.x * z.x + z.y * z.y;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
|
||||
return make_hipFloatComplex(p.x + q.x, p.y + q.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
|
||||
return make_hipFloatComplex(p.x - q.x, p.y - q.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
|
||||
return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
|
||||
float sqabs = hipCsqabsf(q);
|
||||
hipFloatComplex ret;
|
||||
ret.x = (p.x * q.x + p.y * q.y) / sqabs;
|
||||
@@ -167,46 +172,46 @@ __device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hi
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
|
||||
__HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
|
||||
|
||||
|
||||
typedef double2 hipDoubleComplex;
|
||||
|
||||
__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
|
||||
__HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
|
||||
|
||||
__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
|
||||
__HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
|
||||
hipDoubleComplex z;
|
||||
z.x = a;
|
||||
z.y = b;
|
||||
return z;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
|
||||
hipDoubleComplex ret;
|
||||
ret.x = z.x;
|
||||
ret.y = -z.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) {
|
||||
__HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) {
|
||||
return z.x * z.x + z.y * z.y;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
|
||||
double sqabs = hipCsqabs(q);
|
||||
hipDoubleComplex ret;
|
||||
ret.x = (p.x * q.x + p.y * q.y) / sqabs;
|
||||
@@ -214,7 +219,7 @@ __device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, h
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); }
|
||||
__HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); }
|
||||
|
||||
|
||||
#if __cplusplus
|
||||
@@ -268,19 +273,19 @@ COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
|
||||
|
||||
typedef hipFloatComplex hipComplex;
|
||||
|
||||
__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) {
|
||||
__HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) {
|
||||
return make_hipFloatComplex(x, y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
|
||||
__HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
|
||||
return make_hipFloatComplex((float)z.x, (float)z.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
|
||||
return make_hipDoubleComplex((double)z.x, (double)z.y);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
|
||||
__HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
|
||||
float real = (p.x * q.x) + r.x;
|
||||
float imag = (q.x * p.y) + r.y;
|
||||
|
||||
@@ -290,7 +295,7 @@ __device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q
|
||||
return make_hipComplex(real, imag);
|
||||
}
|
||||
|
||||
__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
|
||||
__HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
|
||||
hipDoubleComplex r) {
|
||||
double real = (p.x * q.x) + r.x;
|
||||
double imag = (q.x * p.y) + r.y;
|
||||
|
||||
+115
-111
@@ -25,14 +25,18 @@ THE SOFTWARE.
|
||||
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
|
||||
|
||||
#include <hip/amd_detail/hip_common.h>
|
||||
|
||||
#include "hip/amd_detail/host_defines.h"
|
||||
#include <assert.h>
|
||||
#if defined(__cplusplus)
|
||||
#if defined(__HIPCC_RTC__)
|
||||
#define __HOST_DEVICE__ __device__
|
||||
#else
|
||||
#define __HOST_DEVICE__ __host__ __device__
|
||||
#include <assert.h>
|
||||
#if defined(__cplusplus)
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#endif
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#if __HIP_CLANG_ONLY__
|
||||
typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
|
||||
@@ -82,46 +86,46 @@ THE SOFTWARE.
|
||||
};
|
||||
public:
|
||||
// CREATORS
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half() = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half(const __half_raw& x) : data{x.data} {}
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half(decltype(data) x) : data{x} {}
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half(T x) : data{static_cast<_Float16>(x)} {}
|
||||
#endif
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half(const __half&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half(__half&&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
~__half() = default;
|
||||
|
||||
// CREATORS - DEVICE ONLY
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
template<
|
||||
typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half(T x) : data{static_cast<_Float16>(x)} {}
|
||||
#endif
|
||||
|
||||
// MANIPULATORS
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half& operator=(const __half&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half& operator=(__half&&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half& operator=(const __half_raw& x)
|
||||
{
|
||||
data = x.data;
|
||||
return *this;
|
||||
}
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
volatile __half& operator=(const __half_raw& x) volatile
|
||||
{
|
||||
data = x.data;
|
||||
@@ -151,7 +155,7 @@ THE SOFTWARE.
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half& operator=(T x)
|
||||
{
|
||||
data = static_cast<_Float16>(x);
|
||||
@@ -221,12 +225,12 @@ THE SOFTWARE.
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
operator T() const { return data; }
|
||||
#endif
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
operator __half_raw() const { return __half_raw{data}; }
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
operator __half_raw() const volatile
|
||||
{
|
||||
return __half_raw{data};
|
||||
@@ -235,7 +239,7 @@ THE SOFTWARE.
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
template<
|
||||
typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
operator T() const { return data; }
|
||||
#endif
|
||||
|
||||
@@ -342,38 +346,38 @@ THE SOFTWARE.
|
||||
};
|
||||
|
||||
// CREATORS
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2() = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2(const __half2_raw& x) : data{x.data} {}
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2(decltype(data) x) : data{x} {}
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2(const __half& x, const __half& y)
|
||||
:
|
||||
data{
|
||||
static_cast<__half_raw>(x).data,
|
||||
static_cast<__half_raw>(y).data}
|
||||
{}
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2(const __half2&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2(__half2&&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
~__half2() = default;
|
||||
|
||||
// MANIPULATORS
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2& operator=(const __half2&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2& operator=(__half2&&) = default;
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2& operator=(const __half2_raw& x)
|
||||
{
|
||||
data = x.data;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
// MANIPULATORS - DEVICE ONLY
|
||||
#if !defined(__HIP_NO_HALF_OPERATORS__)
|
||||
__device__
|
||||
@@ -421,9 +425,9 @@ THE SOFTWARE.
|
||||
#endif
|
||||
|
||||
// ACCESSORS
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
operator decltype(data)() const { return data; }
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
operator __half2_raw() const { return __half2_raw{data}; }
|
||||
|
||||
// ACCESSORS - DEVICE ONLY
|
||||
@@ -520,42 +524,42 @@ THE SOFTWARE.
|
||||
namespace
|
||||
{
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 make_half2(__half x, __half y)
|
||||
{
|
||||
return __half2{x, y};
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half __low2half(__half2 x)
|
||||
{
|
||||
return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half __high2half(__half2 x)
|
||||
{
|
||||
return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __half2half2(__half x)
|
||||
{
|
||||
return __half2{x, x};
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __halves2half2(__half x, __half y)
|
||||
{
|
||||
return __half2{x, y};
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __low2half2(__half2 x)
|
||||
{
|
||||
return __half2{
|
||||
@@ -565,7 +569,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __high2half2(__half2 x)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -575,7 +579,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __lows2half2(__half2 x, __half2 y)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -585,7 +589,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __highs2half2(__half2 x, __half2 y)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -595,7 +599,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __lowhigh2highlow(__half2 x)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -638,37 +642,37 @@ THE SOFTWARE.
|
||||
// TODO: rounding behaviour is not correct.
|
||||
// float -> half | half2
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half __float2half(float x)
|
||||
{
|
||||
return __half_raw{static_cast<_Float16>(x)};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half __float2half_rn(float x)
|
||||
{
|
||||
return __half_raw{static_cast<_Float16>(x)};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half __float2half_rz(float x)
|
||||
{
|
||||
return __half_raw{static_cast<_Float16>(x)};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half __float2half_rd(float x)
|
||||
{
|
||||
return __half_raw{static_cast<_Float16>(x)};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half __float2half_ru(float x)
|
||||
{
|
||||
return __half_raw{static_cast<_Float16>(x)};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half2 __float2half2_rn(float x)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -676,14 +680,14 @@ THE SOFTWARE.
|
||||
static_cast<_Float16>(x), static_cast<_Float16>(x)}};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half2 __floats2half2_rn(float x, float y)
|
||||
{
|
||||
return __half2_raw{_Float16_2{
|
||||
static_cast<_Float16>(x), static_cast<_Float16>(y)}};
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
__half2 __float22half2_rn(float2 x)
|
||||
{
|
||||
return __floats2half2_rn(x.x, x.y);
|
||||
@@ -691,25 +695,25 @@ THE SOFTWARE.
|
||||
|
||||
// half | half2 -> float
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
float __half2float(__half x)
|
||||
{
|
||||
return static_cast<__half_raw>(x).data;
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
float __low2float(__half2 x)
|
||||
{
|
||||
return static_cast<__half2_raw>(x).data.x;
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
float __high2float(__half2 x)
|
||||
{
|
||||
return static_cast<__half2_raw>(x).data.y;
|
||||
}
|
||||
inline
|
||||
__device__ __host__
|
||||
__HOST_DEVICE__
|
||||
float2 __half22float2(__half2 x)
|
||||
{
|
||||
return make_float2(
|
||||
@@ -1044,16 +1048,16 @@ THE SOFTWARE.
|
||||
__half __ldcs(const __half* ptr) { return *ptr; }
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __ldg(const __half2* ptr) { return *ptr; }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __ldcg(const __half2* ptr) { return *ptr; }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __ldca(const __half2* ptr) { return *ptr; }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __ldcs(const __half2* ptr) { return *ptr; }
|
||||
|
||||
// Relations
|
||||
@@ -1119,7 +1123,7 @@ THE SOFTWARE.
|
||||
bool __hgtu(__half x, __half y) { return __hgt(x, y); }
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __heq2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(x).data ==
|
||||
@@ -1127,7 +1131,7 @@ THE SOFTWARE.
|
||||
return __builtin_convertvector(-r, _Float16_2);
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hne2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(x).data !=
|
||||
@@ -1135,7 +1139,7 @@ THE SOFTWARE.
|
||||
return __builtin_convertvector(-r, _Float16_2);
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hle2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(x).data <=
|
||||
@@ -1143,7 +1147,7 @@ THE SOFTWARE.
|
||||
return __builtin_convertvector(-r, _Float16_2);
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hge2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(x).data >=
|
||||
@@ -1151,7 +1155,7 @@ THE SOFTWARE.
|
||||
return __builtin_convertvector(-r, _Float16_2);
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hlt2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(x).data <
|
||||
@@ -1159,7 +1163,7 @@ THE SOFTWARE.
|
||||
return __builtin_convertvector(-r, _Float16_2);
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hgt2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(x).data >
|
||||
@@ -1167,83 +1171,83 @@ THE SOFTWARE.
|
||||
return __builtin_convertvector(-r, _Float16_2);
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); }
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbeq2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__heq2(x, y));
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbne2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hne2(x, y));
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hble2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hle2(x, y));
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbge2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hge2(x, y));
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hblt2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hlt2(x, y));
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbgt2(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hgt2(x, y));
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
|
||||
|
||||
// Arithmetic
|
||||
@@ -1332,7 +1336,7 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hadd2(__half2 x, __half2 y)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -1340,14 +1344,14 @@ THE SOFTWARE.
|
||||
static_cast<__half2_raw>(y).data};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __habs2(__half2 x)
|
||||
{
|
||||
return __half2_raw{
|
||||
__ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hsub2(__half2 x, __half2 y)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -1355,7 +1359,7 @@ THE SOFTWARE.
|
||||
static_cast<__half2_raw>(y).data};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hmul2(__half2 x, __half2 y)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -1363,7 +1367,7 @@ THE SOFTWARE.
|
||||
static_cast<__half2_raw>(y).data};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hadd2_sat(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hadd2(x, y));
|
||||
@@ -1372,7 +1376,7 @@ THE SOFTWARE.
|
||||
__clamp_01(__half_raw{r.data.y})};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hsub2_sat(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hsub2(x, y));
|
||||
@@ -1381,7 +1385,7 @@ THE SOFTWARE.
|
||||
__clamp_01(__half_raw{r.data.y})};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hmul2_sat(__half2 x, __half2 y)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hmul2(x, y));
|
||||
@@ -1390,13 +1394,13 @@ THE SOFTWARE.
|
||||
__clamp_01(__half_raw{r.data.y})};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hfma2(__half2 x, __half2 y, __half2 z)
|
||||
{
|
||||
return __half2_raw{__ocml_fma_2f16(x, y, z)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
|
||||
{
|
||||
auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
|
||||
@@ -1405,7 +1409,7 @@ THE SOFTWARE.
|
||||
__clamp_01(__half_raw{r.data.y})};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __h2div(__half2 x, __half2 y)
|
||||
{
|
||||
return __half2_raw{
|
||||
@@ -1548,82 +1552,82 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2trunc(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_trunc_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2ceil(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_ceil_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2floor(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_floor_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2rint(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_rint_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2sin(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_sin_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2cos(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_cos_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2exp(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_exp_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2exp2(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_exp2_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2exp10(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_exp10_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2log2(__half2 x)
|
||||
{
|
||||
return __half2_raw{__ocml_log2_2f16(x)};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hisinf2(__half2 x)
|
||||
{
|
||||
auto r = __ocml_isinf_2f16(x);
|
||||
@@ -1631,7 +1635,7 @@ THE SOFTWARE.
|
||||
static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hisnan2(__half2 x)
|
||||
{
|
||||
auto r = __ocml_isnan_2f16(x);
|
||||
@@ -1639,7 +1643,7 @@ THE SOFTWARE.
|
||||
static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
|
||||
}
|
||||
inline
|
||||
__host__ __device__
|
||||
__HOST_DEVICE__
|
||||
__half2 __hneg2(__half2 x)
|
||||
{
|
||||
return __half2_raw{-static_cast<__half2_raw>(x).data};
|
||||
|
||||
@@ -34,6 +34,7 @@ THE SOFTWARE.
|
||||
//---
|
||||
// Top part of file can be compiled with any compiler
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
//#include <cstring>
|
||||
#if __cplusplus
|
||||
#include <cmath>
|
||||
@@ -42,7 +43,8 @@ THE SOFTWARE.
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#endif //__cplusplus
|
||||
#endif // __cplusplus
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
// __hip_malloc is not working. Disable it by default.
|
||||
#ifndef __HIP_ENABLE_DEVICE_MALLOC__
|
||||
@@ -57,9 +59,10 @@ THE SOFTWARE.
|
||||
|
||||
#define CUDA_SUCCESS hipSuccess
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
extern int HIP_TRACE_API;
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <hip/amd_detail/hip_ldg.h>
|
||||
@@ -121,7 +124,9 @@ extern int HIP_TRACE_API;
|
||||
#define __launch_bounds__(...) \
|
||||
select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#if __HIP_ARCH_GFX701__ == 0
|
||||
|
||||
@@ -162,6 +167,7 @@ static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullpt
|
||||
//
|
||||
// hip-clang functions
|
||||
//
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
#define HIP_SYMBOL(X) X
|
||||
|
||||
@@ -218,6 +224,8 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
#endif
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
|
||||
@@ -245,6 +253,17 @@ struct __HIP_ThreadIdx {
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__HIPCC_RTC__)
|
||||
typedef struct dim3 {
|
||||
uint32_t x; ///< x
|
||||
uint32_t y; ///< y
|
||||
uint32_t z; ///< z
|
||||
#ifdef __cplusplus
|
||||
constexpr __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
|
||||
#endif
|
||||
} dim3;
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
template <typename F>
|
||||
struct __HIP_Coordinates {
|
||||
using R = decltype(F{}(0));
|
||||
@@ -371,6 +390,7 @@ hc_get_workitem_absolute_id(int dim)
|
||||
#endif
|
||||
|
||||
#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
// Support std::complex.
|
||||
#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
|
||||
#pragma push_macro("__CUDA__")
|
||||
@@ -388,6 +408,7 @@ hc_get_workitem_absolute_id(int dim)
|
||||
#undef __CUDA__
|
||||
#pragma pop_macro("__CUDA__")
|
||||
#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
|
||||
#endif // __HIP_CLANG_ONLY__
|
||||
|
||||
|
||||
@@ -33,9 +33,10 @@ THE SOFTWARE.
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <limits.h>
|
||||
//#include <hip/amd_detail/driver_types.h>
|
||||
#include <hip/amd_detail/channel_descriptor.h>
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
#include <hip/amd_detail/texture_types.h>
|
||||
|
||||
#if __cplusplus
|
||||
|
||||
File diff soppresso perché troppo grande
Carica Diff
@@ -28,18 +28,18 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip/amd_detail/host_defines.h>
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <algorithm>
|
||||
|
||||
// assert.h is only for the host version of assert.
|
||||
// The device version of assert is implemented in hip/amd_detail/hip_runtime.h.
|
||||
// Users should include hip_runtime.h for the device version of assert.
|
||||
#if !__HIP_DEVICE_COMPILE__
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#if _LIBCPP_VERSION && __HIP__
|
||||
namespace std {
|
||||
@@ -1460,6 +1460,7 @@ double min(double x, double y) {
|
||||
__HIP_OVERLOAD2(double, max)
|
||||
__HIP_OVERLOAD2(double, min)
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
__host__ inline static int min(int arg1, int arg2) {
|
||||
return std::min(arg1, arg2);
|
||||
}
|
||||
@@ -1467,6 +1468,7 @@ __host__ inline static int min(int arg1, int arg2) {
|
||||
__host__ inline static int max(int arg1, int arg2) {
|
||||
return std::max(arg1, arg2);
|
||||
}
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
__DEVICE__
|
||||
inline float pow(float base, int iexp) {
|
||||
|
||||
@@ -25,10 +25,12 @@ THE SOFTWARE.
|
||||
#if defined(__cplusplus)
|
||||
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/texture_types.h>
|
||||
#include <hip/hip_texture_types.h>
|
||||
#include <hip/amd_detail/ockl_image.h>
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <type_traits>
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#define TEXTURE_PARAMETERS_INIT \
|
||||
unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \
|
||||
|
||||
@@ -28,7 +28,9 @@ THE SOFTWARE.
|
||||
#include <hip/hip_texture_types.h>
|
||||
#include <hip/amd_detail/ockl_image.h>
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <type_traits>
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#define TEXTURE_OBJECT_PARAMETERS_INIT \
|
||||
unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
|
||||
|
||||
@@ -41,6 +41,7 @@ THE SOFTWARE.
|
||||
#error HIP is not supported on GFX10 with wavefront size 64
|
||||
#endif
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
|
||||
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
|
||||
// on NVCC path:
|
||||
@@ -52,6 +53,7 @@ THE SOFTWARE.
|
||||
#if __cplusplus > 199711L
|
||||
#include <thread>
|
||||
#endif
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#include <hip/hip_version.h>
|
||||
#include <hip/hip_common.h>
|
||||
@@ -107,8 +109,10 @@ THE SOFTWARE.
|
||||
#endif // defined(__clang__)
|
||||
#endif
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_vector_types.h>
|
||||
#include <hip/library_types.h>
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
#include <hip/hip_vector_types.h>
|
||||
|
||||
#endif
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user