SWDEV-283266 - __HIPCC_RTC__ macro added to HIP headers

For hipRTC on Windows, add macro __HIPCC_RTC__ to allow
online compilation of with device functions excluding standard
C/C++ headers, system headers, and host HIP APIs.

Change-Id: I1d91f042baf1359856ec83ab7030dc58785e0334
This commit is contained in:
Aaron En Ye Shi
2021-05-05 21:26:21 +00:00
parent 73964037c1
commit 811bc363c8
12 ha cambiato i file con 368 aggiunte e 310 eliminazioni
+2 -1
Vedi File
@@ -26,9 +26,10 @@ THE SOFTWARE.
#include "host_defines.h"
#include "math_fwd.h"
#if !defined(__HIPCC_RTC__)
#include <hip/hip_runtime_api.h>
#include <stddef.h>
#endif // !defined(__HIPCC_RTC__)
#include <hip/hip_vector_types.h>
#include <hip/amd_detail/device_library_decls.h>
+6 -2
Vedi File
@@ -27,9 +27,11 @@ THE SOFTWARE.
// It's defined here for workarround of rocThrust building failure.
#define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H
#if !defined(__HIPCC_RTC__)
#ifndef __cplusplus
#include <stdbool.h>
#endif
#endif // !defined(__HIPCC_RTC__)
typedef void* hipDeviceptr_t;
typedef enum hipChannelFormatKind {
@@ -92,6 +94,7 @@ typedef struct hipArray {
unsigned int textureType;
}hipArray;
#if !defined(__HIPCC_RTC__)
typedef struct hip_Memcpy2D {
size_t srcXInBytes;
size_t srcY;
@@ -110,7 +113,7 @@ typedef struct hip_Memcpy2D {
size_t WidthInBytes;
size_t Height;
} hip_Memcpy2D;
#endif // !defined(__HIPCC_RTC__)
typedef struct hipArray* hipArray_t;
typedef hipArray_t hiparray;
@@ -359,6 +362,7 @@ typedef struct HIP_RESOURCE_VIEW_DESC_st
* Memory copy types
*
*/
#if !defined(__HIPCC_RTC__)
typedef enum hipMemcpyKind {
hipMemcpyHostToHost = 0, ///< Host-to-Host Copy
hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy
@@ -470,5 +474,5 @@ typedef enum hipFunction_attribute {
HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
HIP_FUNC_ATTRIBUTE_MAX
}hipFunction_attribute;
#endif // !defined(__HIPCC_RTC__)
#endif
+4
Vedi File
@@ -111,7 +111,9 @@ float atomicAdd_system(float* address, float val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
#if !defined(__HIPCC_RTC__)
DEPRECATED("use atomicAdd instead")
#endif // !defined(__HIPCC_RTC__)
__device__
inline
void atomicAddNoRet(float* address, float val)
@@ -476,7 +478,9 @@ float atomicAdd(float* address, float val)
return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
}
#if !defined(__HIPCC_RTC__)
DEPRECATED("use atomicAdd instead")
#endif // !defined(__HIPCC_RTC__)
__device__
inline
void atomicAddNoRet(float* address, float val)
+42 -37
Vedi File
@@ -25,6 +25,10 @@ THE SOFTWARE.
#include "hip/amd_detail/hip_vector_types.h"
#if defined(__HIPCC_RTC__)
#define __HOST_DEVICE__ __device__
#else
#define __HOST_DEVICE__ __host__ __device__
// TODO: Clang has a bug which allows device functions to call std functions
// when std functions are introduced into default namespace by using statement.
// math.h may be included after this bug is fixed.
@@ -33,10 +37,11 @@ THE SOFTWARE.
#else
#include "math.h"
#endif
#endif // !defined(__HIPCC_RTC__)
#if __cplusplus
#define COMPLEX_NEG_OP_OVERLOAD(type) \
__device__ __host__ static inline type operator-(const type& op) { \
__HOST_DEVICE__ static inline type operator-(const type& op) { \
type ret; \
ret.x = -op.x; \
ret.y = -op.y; \
@@ -44,17 +49,17 @@ THE SOFTWARE.
}
#define COMPLEX_EQ_OP_OVERLOAD(type) \
__device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline bool operator==(const type& lhs, const type& rhs) { \
return lhs.x == rhs.x && lhs.y == rhs.y; \
}
#define COMPLEX_NE_OP_OVERLOAD(type) \
__device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline bool operator!=(const type& lhs, const type& rhs) { \
return !(lhs == rhs); \
}
#define COMPLEX_ADD_OP_OVERLOAD(type) \
__device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type operator+(const type& lhs, const type& rhs) { \
type ret; \
ret.x = lhs.x + rhs.x; \
ret.y = lhs.y + rhs.y; \
@@ -62,7 +67,7 @@ THE SOFTWARE.
}
#define COMPLEX_SUB_OP_OVERLOAD(type) \
__device__ __host__ static inline type operator-(const type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type operator-(const type& lhs, const type& rhs) { \
type ret; \
ret.x = lhs.x - rhs.x; \
ret.y = lhs.y - rhs.y; \
@@ -70,7 +75,7 @@ THE SOFTWARE.
}
#define COMPLEX_MUL_OP_OVERLOAD(type) \
__device__ __host__ static inline type operator*(const type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type operator*(const type& lhs, const type& rhs) { \
type ret; \
ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \
ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \
@@ -78,7 +83,7 @@ THE SOFTWARE.
}
#define COMPLEX_DIV_OP_OVERLOAD(type) \
__device__ __host__ static inline type operator/(const type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type operator/(const type& lhs, const type& rhs) { \
type ret; \
ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \
ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \
@@ -88,33 +93,33 @@ THE SOFTWARE.
}
#define COMPLEX_ADD_PREOP_OVERLOAD(type) \
__device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type& operator+=(type& lhs, const type& rhs) { \
lhs.x += rhs.x; \
lhs.y += rhs.y; \
return lhs; \
}
#define COMPLEX_SUB_PREOP_OVERLOAD(type) \
__device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type& operator-=(type& lhs, const type& rhs) { \
lhs.x -= rhs.x; \
lhs.y -= rhs.y; \
return lhs; \
}
#define COMPLEX_MUL_PREOP_OVERLOAD(type) \
__device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type& operator*=(type& lhs, const type& rhs) { \
lhs = lhs * rhs; \
return lhs; \
}
#define COMPLEX_DIV_PREOP_OVERLOAD(type) \
__device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) { \
__HOST_DEVICE__ static inline type& operator/=(type& lhs, const type& rhs) { \
lhs = lhs / rhs; \
return lhs; \
}
#define COMPLEX_SCALAR_PRODUCT(type, type1) \
__device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \
__HOST_DEVICE__ static inline type operator*(const type& lhs, type1 rhs) { \
type ret; \
ret.x = lhs.x * rhs; \
ret.y = lhs.y * rhs; \
@@ -125,41 +130,41 @@ THE SOFTWARE.
typedef float2 hipFloatComplex;
__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
__HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
__HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
__HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
hipFloatComplex z;
z.x = a;
z.y = b;
return z;
}
__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
__HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
hipFloatComplex ret;
ret.x = z.x;
ret.y = -z.y;
return ret;
}
__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) {
__HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) {
return z.x * z.x + z.y * z.y;
}
__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
__HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
return make_hipFloatComplex(p.x + q.x, p.y + q.y);
}
__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
__HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
return make_hipFloatComplex(p.x - q.x, p.y - q.y);
}
__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
__HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
}
__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
__HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
float sqabs = hipCsqabsf(q);
hipFloatComplex ret;
ret.x = (p.x * q.x + p.y * q.y) / sqabs;
@@ -167,46 +172,46 @@ __device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hi
return ret;
}
__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
__HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
typedef double2 hipDoubleComplex;
__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
__HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
__HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
__HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
hipDoubleComplex z;
z.x = a;
z.y = b;
return z;
}
__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
__HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
hipDoubleComplex ret;
ret.x = z.x;
ret.y = -z.y;
return ret;
}
__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) {
__HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) {
return z.x * z.x + z.y * z.y;
}
__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
__HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
}
__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
__HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
}
__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
__HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
}
__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
__HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
double sqabs = hipCsqabs(q);
hipDoubleComplex ret;
ret.x = (p.x * q.x + p.y * q.y) / sqabs;
@@ -214,7 +219,7 @@ __device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, h
return ret;
}
__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); }
__HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); }
#if __cplusplus
@@ -268,19 +273,19 @@ COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
typedef hipFloatComplex hipComplex;
__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) {
__HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) {
return make_hipFloatComplex(x, y);
}
__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
__HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
return make_hipFloatComplex((float)z.x, (float)z.y);
}
__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
__HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
return make_hipDoubleComplex((double)z.x, (double)z.y);
}
__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
__HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
float real = (p.x * q.x) + r.x;
float imag = (q.x * p.y) + r.y;
@@ -290,7 +295,7 @@ __device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q
return make_hipComplex(real, imag);
}
__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
__HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
hipDoubleComplex r) {
double real = (p.x * q.x) + r.x;
double imag = (q.x * p.y) + r.y;
+115 -111
Vedi File
@@ -25,14 +25,18 @@ THE SOFTWARE.
#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
#include <hip/amd_detail/hip_common.h>
#include "hip/amd_detail/host_defines.h"
#include <assert.h>
#if defined(__cplusplus)
#if defined(__HIPCC_RTC__)
#define __HOST_DEVICE__ __device__
#else
#define __HOST_DEVICE__ __host__ __device__
#include <assert.h>
#if defined(__cplusplus)
#include <algorithm>
#include <type_traits>
#include <utility>
#endif
#endif // !defined(__HIPCC_RTC__)
#if __HIP_CLANG_ONLY__
typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
@@ -82,46 +86,46 @@ THE SOFTWARE.
};
public:
// CREATORS
__host__ __device__
__HOST_DEVICE__
__half() = default;
__host__ __device__
__HOST_DEVICE__
__half(const __half_raw& x) : data{x.data} {}
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
__host__ __device__
__HOST_DEVICE__
__half(decltype(data) x) : data{x} {}
template<
typename T,
Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
__host__ __device__
__HOST_DEVICE__
__half(T x) : data{static_cast<_Float16>(x)} {}
#endif
__host__ __device__
__HOST_DEVICE__
__half(const __half&) = default;
__host__ __device__
__HOST_DEVICE__
__half(__half&&) = default;
__host__ __device__
__HOST_DEVICE__
~__half() = default;
// CREATORS - DEVICE ONLY
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
template<
typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
__host__ __device__
__HOST_DEVICE__
__half(T x) : data{static_cast<_Float16>(x)} {}
#endif
// MANIPULATORS
__host__ __device__
__HOST_DEVICE__
__half& operator=(const __half&) = default;
__host__ __device__
__HOST_DEVICE__
__half& operator=(__half&&) = default;
__host__ __device__
__HOST_DEVICE__
__half& operator=(const __half_raw& x)
{
data = x.data;
return *this;
}
__host__ __device__
__HOST_DEVICE__
volatile __half& operator=(const __half_raw& x) volatile
{
data = x.data;
@@ -151,7 +155,7 @@ THE SOFTWARE.
template<
typename T,
Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
__host__ __device__
__HOST_DEVICE__
__half& operator=(T x)
{
data = static_cast<_Float16>(x);
@@ -221,12 +225,12 @@ THE SOFTWARE.
template<
typename T,
Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
__host__ __device__
__HOST_DEVICE__
operator T() const { return data; }
#endif
__host__ __device__
__HOST_DEVICE__
operator __half_raw() const { return __half_raw{data}; }
__host__ __device__
__HOST_DEVICE__
operator __half_raw() const volatile
{
return __half_raw{data};
@@ -235,7 +239,7 @@ THE SOFTWARE.
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
template<
typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
__host__ __device__
__HOST_DEVICE__
operator T() const { return data; }
#endif
@@ -342,38 +346,38 @@ THE SOFTWARE.
};
// CREATORS
__host__ __device__
__HOST_DEVICE__
__half2() = default;
__host__ __device__
__HOST_DEVICE__
__half2(const __half2_raw& x) : data{x.data} {}
__host__ __device__
__HOST_DEVICE__
__half2(decltype(data) x) : data{x} {}
__host__ __device__
__HOST_DEVICE__
__half2(const __half& x, const __half& y)
:
data{
static_cast<__half_raw>(x).data,
static_cast<__half_raw>(y).data}
{}
__host__ __device__
__HOST_DEVICE__
__half2(const __half2&) = default;
__host__ __device__
__HOST_DEVICE__
__half2(__half2&&) = default;
__host__ __device__
__HOST_DEVICE__
~__half2() = default;
// MANIPULATORS
__host__ __device__
__HOST_DEVICE__
__half2& operator=(const __half2&) = default;
__host__ __device__
__HOST_DEVICE__
__half2& operator=(__half2&&) = default;
__host__ __device__
__HOST_DEVICE__
__half2& operator=(const __half2_raw& x)
{
data = x.data;
return *this;
}
// MANIPULATORS - DEVICE ONLY
#if !defined(__HIP_NO_HALF_OPERATORS__)
__device__
@@ -421,9 +425,9 @@ THE SOFTWARE.
#endif
// ACCESSORS
__host__ __device__
__HOST_DEVICE__
operator decltype(data)() const { return data; }
__host__ __device__
__HOST_DEVICE__
operator __half2_raw() const { return __half2_raw{data}; }
// ACCESSORS - DEVICE ONLY
@@ -520,42 +524,42 @@ THE SOFTWARE.
namespace
{
inline
__host__ __device__
__HOST_DEVICE__
__half2 make_half2(__half x, __half y)
{
return __half2{x, y};
}
inline
__host__ __device__
__HOST_DEVICE__
__half __low2half(__half2 x)
{
return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
}
inline
__host__ __device__
__HOST_DEVICE__
__half __high2half(__half2 x)
{
return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __half2half2(__half x)
{
return __half2{x, x};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __halves2half2(__half x, __half y)
{
return __half2{x, y};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __low2half2(__half2 x)
{
return __half2{
@@ -565,7 +569,7 @@ THE SOFTWARE.
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __high2half2(__half2 x)
{
return __half2_raw{
@@ -575,7 +579,7 @@ THE SOFTWARE.
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __lows2half2(__half2 x, __half2 y)
{
return __half2_raw{
@@ -585,7 +589,7 @@ THE SOFTWARE.
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __highs2half2(__half2 x, __half2 y)
{
return __half2_raw{
@@ -595,7 +599,7 @@ THE SOFTWARE.
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __lowhigh2highlow(__half2 x)
{
return __half2_raw{
@@ -638,37 +642,37 @@ THE SOFTWARE.
// TODO: rounding behaviour is not correct.
// float -> half | half2
inline
__device__ __host__
__HOST_DEVICE__
__half __float2half(float x)
{
return __half_raw{static_cast<_Float16>(x)};
}
inline
__device__ __host__
__HOST_DEVICE__
__half __float2half_rn(float x)
{
return __half_raw{static_cast<_Float16>(x)};
}
inline
__device__ __host__
__HOST_DEVICE__
__half __float2half_rz(float x)
{
return __half_raw{static_cast<_Float16>(x)};
}
inline
__device__ __host__
__HOST_DEVICE__
__half __float2half_rd(float x)
{
return __half_raw{static_cast<_Float16>(x)};
}
inline
__device__ __host__
__HOST_DEVICE__
__half __float2half_ru(float x)
{
return __half_raw{static_cast<_Float16>(x)};
}
inline
__device__ __host__
__HOST_DEVICE__
__half2 __float2half2_rn(float x)
{
return __half2_raw{
@@ -676,14 +680,14 @@ THE SOFTWARE.
static_cast<_Float16>(x), static_cast<_Float16>(x)}};
}
inline
__device__ __host__
__HOST_DEVICE__
__half2 __floats2half2_rn(float x, float y)
{
return __half2_raw{_Float16_2{
static_cast<_Float16>(x), static_cast<_Float16>(y)}};
}
inline
__device__ __host__
__HOST_DEVICE__
__half2 __float22half2_rn(float2 x)
{
return __floats2half2_rn(x.x, x.y);
@@ -691,25 +695,25 @@ THE SOFTWARE.
// half | half2 -> float
inline
__device__ __host__
__HOST_DEVICE__
float __half2float(__half x)
{
return static_cast<__half_raw>(x).data;
}
inline
__device__ __host__
__HOST_DEVICE__
float __low2float(__half2 x)
{
return static_cast<__half2_raw>(x).data.x;
}
inline
__device__ __host__
__HOST_DEVICE__
float __high2float(__half2 x)
{
return static_cast<__half2_raw>(x).data.y;
}
inline
__device__ __host__
__HOST_DEVICE__
float2 __half22float2(__half2 x)
{
return make_float2(
@@ -1044,16 +1048,16 @@ THE SOFTWARE.
__half __ldcs(const __half* ptr) { return *ptr; }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __ldg(const __half2* ptr) { return *ptr; }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __ldcg(const __half2* ptr) { return *ptr; }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __ldca(const __half2* ptr) { return *ptr; }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __ldcs(const __half2* ptr) { return *ptr; }
// Relations
@@ -1119,7 +1123,7 @@ THE SOFTWARE.
bool __hgtu(__half x, __half y) { return __hgt(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __heq2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(x).data ==
@@ -1127,7 +1131,7 @@ THE SOFTWARE.
return __builtin_convertvector(-r, _Float16_2);
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hne2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(x).data !=
@@ -1135,7 +1139,7 @@ THE SOFTWARE.
return __builtin_convertvector(-r, _Float16_2);
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hle2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(x).data <=
@@ -1143,7 +1147,7 @@ THE SOFTWARE.
return __builtin_convertvector(-r, _Float16_2);
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hge2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(x).data >=
@@ -1151,7 +1155,7 @@ THE SOFTWARE.
return __builtin_convertvector(-r, _Float16_2);
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hlt2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(x).data <
@@ -1159,7 +1163,7 @@ THE SOFTWARE.
return __builtin_convertvector(-r, _Float16_2);
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hgt2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(x).data >
@@ -1167,83 +1171,83 @@ THE SOFTWARE.
return __builtin_convertvector(-r, _Float16_2);
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
bool __hbeq2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__heq2(x, y));
return r.data.x != 0 && r.data.y != 0;
}
inline
__host__ __device__
__HOST_DEVICE__
bool __hbne2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hne2(x, y));
return r.data.x != 0 && r.data.y != 0;
}
inline
__host__ __device__
__HOST_DEVICE__
bool __hble2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hle2(x, y));
return r.data.x != 0 && r.data.y != 0;
}
inline
__host__ __device__
__HOST_DEVICE__
bool __hbge2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hge2(x, y));
return r.data.x != 0 && r.data.y != 0;
}
inline
__host__ __device__
__HOST_DEVICE__
bool __hblt2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hlt2(x, y));
return r.data.x != 0 && r.data.y != 0;
}
inline
__host__ __device__
__HOST_DEVICE__
bool __hbgt2(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hgt2(x, y));
return r.data.x != 0 && r.data.y != 0;
}
inline
__host__ __device__
__HOST_DEVICE__
bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
inline
__host__ __device__
__HOST_DEVICE__
bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
// Arithmetic
@@ -1332,7 +1336,7 @@ THE SOFTWARE.
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hadd2(__half2 x, __half2 y)
{
return __half2_raw{
@@ -1340,14 +1344,14 @@ THE SOFTWARE.
static_cast<__half2_raw>(y).data};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __habs2(__half2 x)
{
return __half2_raw{
__ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hsub2(__half2 x, __half2 y)
{
return __half2_raw{
@@ -1355,7 +1359,7 @@ THE SOFTWARE.
static_cast<__half2_raw>(y).data};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hmul2(__half2 x, __half2 y)
{
return __half2_raw{
@@ -1363,7 +1367,7 @@ THE SOFTWARE.
static_cast<__half2_raw>(y).data};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hadd2_sat(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hadd2(x, y));
@@ -1372,7 +1376,7 @@ THE SOFTWARE.
__clamp_01(__half_raw{r.data.y})};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hsub2_sat(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hsub2(x, y));
@@ -1381,7 +1385,7 @@ THE SOFTWARE.
__clamp_01(__half_raw{r.data.y})};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hmul2_sat(__half2 x, __half2 y)
{
auto r = static_cast<__half2_raw>(__hmul2(x, y));
@@ -1390,13 +1394,13 @@ THE SOFTWARE.
__clamp_01(__half_raw{r.data.y})};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hfma2(__half2 x, __half2 y, __half2 z)
{
return __half2_raw{__ocml_fma_2f16(x, y, z)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
{
auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
@@ -1405,7 +1409,7 @@ THE SOFTWARE.
__clamp_01(__half_raw{r.data.y})};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __h2div(__half2 x, __half2 y)
{
return __half2_raw{
@@ -1548,82 +1552,82 @@ THE SOFTWARE.
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2trunc(__half2 x)
{
return __half2_raw{__ocml_trunc_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2ceil(__half2 x)
{
return __half2_raw{__ocml_ceil_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2floor(__half2 x)
{
return __half2_raw{__ocml_floor_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2rint(__half2 x)
{
return __half2_raw{__ocml_rint_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2sin(__half2 x)
{
return __half2_raw{__ocml_sin_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2cos(__half2 x)
{
return __half2_raw{__ocml_cos_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2exp(__half2 x)
{
return __half2_raw{__ocml_exp_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2exp2(__half2 x)
{
return __half2_raw{__ocml_exp2_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2exp10(__half2 x)
{
return __half2_raw{__ocml_exp10_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2log2(__half2 x)
{
return __half2_raw{__ocml_log2_2f16(x)};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hisinf2(__half2 x)
{
auto r = __ocml_isinf_2f16(x);
@@ -1631,7 +1635,7 @@ THE SOFTWARE.
static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hisnan2(__half2 x)
{
auto r = __ocml_isnan_2f16(x);
@@ -1639,7 +1643,7 @@ THE SOFTWARE.
static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
}
inline
__host__ __device__
__HOST_DEVICE__
__half2 __hneg2(__half2 x)
{
return __half2_raw{-static_cast<__half2_raw>(x).data};
+23 -2
Vedi File
@@ -34,6 +34,7 @@ THE SOFTWARE.
//---
// Top part of file can be compiled with any compiler
#if !defined(__HIPCC_RTC__)
//#include <cstring>
#if __cplusplus
#include <cmath>
@@ -42,7 +43,8 @@ THE SOFTWARE.
#include <math.h>
#include <string.h>
#include <stddef.h>
#endif //__cplusplus
#endif // __cplusplus
#endif // !defined(__HIPCC_RTC__)
// __hip_malloc is not working. Disable it by default.
#ifndef __HIP_ENABLE_DEVICE_MALLOC__
@@ -57,9 +59,10 @@ THE SOFTWARE.
#define CUDA_SUCCESS hipSuccess
#if !defined(__HIPCC_RTC__)
#include <hip/hip_runtime_api.h>
extern int HIP_TRACE_API;
#endif // !defined(__HIPCC_RTC__)
#ifdef __cplusplus
#include <hip/amd_detail/hip_ldg.h>
@@ -121,7 +124,9 @@ extern int HIP_TRACE_API;
#define __launch_bounds__(...) \
select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
#if !defined(__HIPCC_RTC__)
__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
#endif // !defined(__HIPCC_RTC__)
#if __HIP_ARCH_GFX701__ == 0
@@ -162,6 +167,7 @@ static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullpt
//
// hip-clang functions
//
#if !defined(__HIPCC_RTC__)
#define HIP_KERNEL_NAME(...) __VA_ARGS__
#define HIP_SYMBOL(X) X
@@ -218,6 +224,8 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
#endif
#include <hip/hip_runtime_api.h>
#endif // !defined(__HIPCC_RTC__)
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
@@ -245,6 +253,17 @@ struct __HIP_ThreadIdx {
}
};
#if defined(__HIPCC_RTC__)
typedef struct dim3 {
uint32_t x; ///< x
uint32_t y; ///< y
uint32_t z; ///< z
#ifdef __cplusplus
constexpr __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
#endif
} dim3;
#endif // !defined(__HIPCC_RTC__)
template <typename F>
struct __HIP_Coordinates {
using R = decltype(F{}(0));
@@ -371,6 +390,7 @@ hc_get_workitem_absolute_id(int dim)
#endif
#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
#if !defined(__HIPCC_RTC__)
// Support std::complex.
#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
#pragma push_macro("__CUDA__")
@@ -388,6 +408,7 @@ hc_get_workitem_absolute_id(int dim)
#undef __CUDA__
#pragma pop_macro("__CUDA__")
#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
#endif // !defined(__HIPCC_RTC__)
#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
#endif // __HIP_CLANG_ONLY__
@@ -33,9 +33,10 @@ THE SOFTWARE.
* *
* *
*******************************************************************************/
#if !defined(__HIPCC_RTC__)
#include <limits.h>
//#include <hip/amd_detail/driver_types.h>
#include <hip/amd_detail/channel_descriptor.h>
#endif // !defined(__HIPCC_RTC__)
#include <hip/amd_detail/texture_types.h>
#if __cplusplus
File diff soppresso perché troppo grande Carica Diff
+4 -2
Vedi File
@@ -28,18 +28,18 @@ THE SOFTWARE.
#include <hip/amd_detail/host_defines.h>
#if !defined(__HIPCC_RTC__)
#include <algorithm>
// assert.h is only for the host version of assert.
// The device version of assert is implemented in hip/amd_detail/hip_runtime.h.
// Users should include hip_runtime.h for the device version of assert.
#if !__HIP_DEVICE_COMPILE__
#include <assert.h>
#endif
#include <limits.h>
#include <limits>
#include <stdint.h>
#endif // !defined(__HIPCC_RTC__)
#if _LIBCPP_VERSION && __HIP__
namespace std {
@@ -1460,6 +1460,7 @@ double min(double x, double y) {
__HIP_OVERLOAD2(double, max)
__HIP_OVERLOAD2(double, min)
#if !defined(__HIPCC_RTC__)
__host__ inline static int min(int arg1, int arg2) {
return std::min(arg1, arg2);
}
@@ -1467,6 +1468,7 @@ __host__ inline static int min(int arg1, int arg2) {
__host__ inline static int max(int arg1, int arg2) {
return std::max(arg1, arg2);
}
#endif // !defined(__HIPCC_RTC__)
__DEVICE__
inline float pow(float base, int iexp) {
@@ -25,10 +25,12 @@ THE SOFTWARE.
#if defined(__cplusplus)
#include <hip/hip_vector_types.h>
#include <hip/texture_types.h>
#include <hip/hip_texture_types.h>
#include <hip/amd_detail/ockl_image.h>
#if !defined(__HIPCC_RTC__)
#include <type_traits>
#endif // !defined(__HIPCC_RTC__)
#define TEXTURE_PARAMETERS_INIT \
unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \
@@ -28,7 +28,9 @@ THE SOFTWARE.
#include <hip/hip_texture_types.h>
#include <hip/amd_detail/ockl_image.h>
#if !defined(__HIPCC_RTC__)
#include <type_traits>
#endif // !defined(__HIPCC_RTC__)
#define TEXTURE_OBJECT_PARAMETERS_INIT \
unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
+5 -1
Vedi File
@@ -41,6 +41,7 @@ THE SOFTWARE.
#error HIP is not supported on GFX10 with wavefront size 64
#endif
#if !defined(__HIPCC_RTC__)
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
// on NVCC path:
@@ -52,6 +53,7 @@ THE SOFTWARE.
#if __cplusplus > 199711L
#include <thread>
#endif
#endif // !defined(__HIPCC_RTC__)
#include <hip/hip_version.h>
#include <hip/hip_common.h>
@@ -107,8 +109,10 @@ THE SOFTWARE.
#endif // defined(__clang__)
#endif
#if !defined(__HIPCC_RTC__)
#include <hip/hip_runtime_api.h>
#include <hip/hip_vector_types.h>
#include <hip/library_types.h>
#endif // !defined(__HIPCC_RTC__)
#include <hip/hip_vector_types.h>
#endif