From 811bc363c817cfd8bb07beebfd185bf4b9d27c18 Mon Sep 17 00:00:00 2001 From: Aaron En Ye Shi Date: Wed, 5 May 2021 21:26:21 +0000 Subject: [PATCH] SWDEV-283266 - __HIPCC_RTC__ macro added to HIP headers For hipRTC on Windows, add macro __HIPCC_RTC__ to allow online compilation of with device functions excluding standard C/C++ headers, system headers, and host HIP APIs. Change-Id: I1d91f042baf1359856ec83ab7030dc58785e0334 --- include/hip/amd_detail/device_functions.h | 3 +- include/hip/amd_detail/driver_types.h | 8 +- include/hip/amd_detail/hip_atomic.h | 4 + include/hip/amd_detail/hip_complex.h | 79 ++--- include/hip/amd_detail/hip_fp16.h | 226 ++++++------- include/hip/amd_detail/hip_runtime.h | 25 +- include/hip/amd_detail/hip_texture_types.h | 3 +- include/hip/amd_detail/hip_vector_types.h | 312 +++++++++--------- include/hip/amd_detail/math_functions.h | 6 +- .../hip/amd_detail/texture_fetch_functions.h | 4 +- .../amd_detail/texture_indirect_functions.h | 2 + include/hip/hip_runtime.h | 6 +- 12 files changed, 368 insertions(+), 310 deletions(-) diff --git a/include/hip/amd_detail/device_functions.h b/include/hip/amd_detail/device_functions.h index 703e72b5f1..0bc9cd33e8 100644 --- a/include/hip/amd_detail/device_functions.h +++ b/include/hip/amd_detail/device_functions.h @@ -26,9 +26,10 @@ THE SOFTWARE. #include "host_defines.h" #include "math_fwd.h" +#if !defined(__HIPCC_RTC__) #include #include - +#endif // !defined(__HIPCC_RTC__) #include #include diff --git a/include/hip/amd_detail/driver_types.h b/include/hip/amd_detail/driver_types.h index 4de4e3ee7e..fe29d1f144 100644 --- a/include/hip/amd_detail/driver_types.h +++ b/include/hip/amd_detail/driver_types.h @@ -27,9 +27,11 @@ THE SOFTWARE. // It's defined here for workarround of rocThrust building failure. #define HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H +#if !defined(__HIPCC_RTC__) #ifndef __cplusplus #include #endif +#endif // !defined(__HIPCC_RTC__) typedef void* hipDeviceptr_t; typedef enum hipChannelFormatKind { @@ -92,6 +94,7 @@ typedef struct hipArray { unsigned int textureType; }hipArray; +#if !defined(__HIPCC_RTC__) typedef struct hip_Memcpy2D { size_t srcXInBytes; size_t srcY; @@ -110,7 +113,7 @@ typedef struct hip_Memcpy2D { size_t WidthInBytes; size_t Height; } hip_Memcpy2D; - +#endif // !defined(__HIPCC_RTC__) typedef struct hipArray* hipArray_t; typedef hipArray_t hiparray; @@ -359,6 +362,7 @@ typedef struct HIP_RESOURCE_VIEW_DESC_st * Memory copy types * */ +#if !defined(__HIPCC_RTC__) typedef enum hipMemcpyKind { hipMemcpyHostToHost = 0, ///< Host-to-Host Copy hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy @@ -470,5 +474,5 @@ typedef enum hipFunction_attribute { HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, HIP_FUNC_ATTRIBUTE_MAX }hipFunction_attribute; - +#endif // !defined(__HIPCC_RTC__) #endif diff --git a/include/hip/amd_detail/hip_atomic.h b/include/hip/amd_detail/hip_atomic.h index fdbf5be463..0c4bc80cf6 100644 --- a/include/hip/amd_detail/hip_atomic.h +++ b/include/hip/amd_detail/hip_atomic.h @@ -111,7 +111,9 @@ float atomicAdd_system(float* address, float val) { return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); } +#if !defined(__HIPCC_RTC__) DEPRECATED("use atomicAdd instead") +#endif // !defined(__HIPCC_RTC__) __device__ inline void atomicAddNoRet(float* address, float val) @@ -476,7 +478,9 @@ float atomicAdd(float* address, float val) return __atomic_fetch_add(address, val, __ATOMIC_RELAXED); } +#if !defined(__HIPCC_RTC__) DEPRECATED("use atomicAdd instead") +#endif // !defined(__HIPCC_RTC__) __device__ inline void atomicAddNoRet(float* address, float val) diff --git a/include/hip/amd_detail/hip_complex.h b/include/hip/amd_detail/hip_complex.h index 4a59b52a54..db312780c1 100644 --- a/include/hip/amd_detail/hip_complex.h +++ b/include/hip/amd_detail/hip_complex.h @@ -25,6 +25,10 @@ THE SOFTWARE. #include "hip/amd_detail/hip_vector_types.h" +#if defined(__HIPCC_RTC__) +#define __HOST_DEVICE__ __device__ +#else +#define __HOST_DEVICE__ __host__ __device__ // TODO: Clang has a bug which allows device functions to call std functions // when std functions are introduced into default namespace by using statement. // math.h may be included after this bug is fixed. @@ -33,10 +37,11 @@ THE SOFTWARE. #else #include "math.h" #endif +#endif // !defined(__HIPCC_RTC__) #if __cplusplus #define COMPLEX_NEG_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator-(const type& op) { \ + __HOST_DEVICE__ static inline type operator-(const type& op) { \ type ret; \ ret.x = -op.x; \ ret.y = -op.y; \ @@ -44,17 +49,17 @@ THE SOFTWARE. } #define COMPLEX_EQ_OP_OVERLOAD(type) \ - __device__ __host__ static inline bool operator==(const type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline bool operator==(const type& lhs, const type& rhs) { \ return lhs.x == rhs.x && lhs.y == rhs.y; \ } #define COMPLEX_NE_OP_OVERLOAD(type) \ - __device__ __host__ static inline bool operator!=(const type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline bool operator!=(const type& lhs, const type& rhs) { \ return !(lhs == rhs); \ } #define COMPLEX_ADD_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator+(const type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type operator+(const type& lhs, const type& rhs) { \ type ret; \ ret.x = lhs.x + rhs.x; \ ret.y = lhs.y + rhs.y; \ @@ -62,7 +67,7 @@ THE SOFTWARE. } #define COMPLEX_SUB_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator-(const type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type operator-(const type& lhs, const type& rhs) { \ type ret; \ ret.x = lhs.x - rhs.x; \ ret.y = lhs.y - rhs.y; \ @@ -70,7 +75,7 @@ THE SOFTWARE. } #define COMPLEX_MUL_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator*(const type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type operator*(const type& lhs, const type& rhs) { \ type ret; \ ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \ ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \ @@ -78,7 +83,7 @@ THE SOFTWARE. } #define COMPLEX_DIV_OP_OVERLOAD(type) \ - __device__ __host__ static inline type operator/(const type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type operator/(const type& lhs, const type& rhs) { \ type ret; \ ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \ ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \ @@ -88,33 +93,33 @@ THE SOFTWARE. } #define COMPLEX_ADD_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator+=(type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type& operator+=(type& lhs, const type& rhs) { \ lhs.x += rhs.x; \ lhs.y += rhs.y; \ return lhs; \ } #define COMPLEX_SUB_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator-=(type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type& operator-=(type& lhs, const type& rhs) { \ lhs.x -= rhs.x; \ lhs.y -= rhs.y; \ return lhs; \ } #define COMPLEX_MUL_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator*=(type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type& operator*=(type& lhs, const type& rhs) { \ lhs = lhs * rhs; \ return lhs; \ } #define COMPLEX_DIV_PREOP_OVERLOAD(type) \ - __device__ __host__ static inline type& operator/=(type& lhs, const type& rhs) { \ + __HOST_DEVICE__ static inline type& operator/=(type& lhs, const type& rhs) { \ lhs = lhs / rhs; \ return lhs; \ } #define COMPLEX_SCALAR_PRODUCT(type, type1) \ - __device__ __host__ static inline type operator*(const type& lhs, type1 rhs) { \ + __HOST_DEVICE__ static inline type operator*(const type& lhs, type1 rhs) { \ type ret; \ ret.x = lhs.x * rhs; \ ret.y = lhs.y * rhs; \ @@ -125,41 +130,41 @@ THE SOFTWARE. typedef float2 hipFloatComplex; -__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } +__HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } -__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } +__HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } -__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { +__HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { hipFloatComplex z; z.x = a; z.y = b; return z; } -__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { +__HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { hipFloatComplex ret; ret.x = z.x; ret.y = -z.y; return ret; } -__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { +__HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) { return z.x * z.x + z.y * z.y; } -__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { +__HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { return make_hipFloatComplex(p.x + q.x, p.y + q.y); } -__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { +__HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { return make_hipFloatComplex(p.x - q.x, p.y - q.y); } -__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { +__HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); } -__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { +__HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { float sqabs = hipCsqabsf(q); hipFloatComplex ret; ret.x = (p.x * q.x + p.y * q.y) / sqabs; @@ -167,46 +172,46 @@ __device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hi return ret; } -__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } +__HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } typedef double2 hipDoubleComplex; -__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } +__HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } -__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } +__HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } -__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { +__HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { hipDoubleComplex z; z.x = a; z.y = b; return z; } -__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { +__HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { hipDoubleComplex ret; ret.x = z.x; ret.y = -z.y; return ret; } -__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { +__HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) { return z.x * z.x + z.y * z.y; } -__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { +__HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { return make_hipDoubleComplex(p.x + q.x, p.y + q.y); } -__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { +__HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { return make_hipDoubleComplex(p.x - q.x, p.y - q.y); } -__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { +__HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); } -__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { +__HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { double sqabs = hipCsqabs(q); hipDoubleComplex ret; ret.x = (p.x * q.x + p.y * q.y) / sqabs; @@ -214,7 +219,7 @@ __device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, h return ret; } -__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); } +__HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); } #if __cplusplus @@ -268,19 +273,19 @@ COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long) typedef hipFloatComplex hipComplex; -__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) { +__HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) { return make_hipFloatComplex(x, y); } -__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { +__HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { return make_hipFloatComplex((float)z.x, (float)z.y); } -__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { +__HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { return make_hipDoubleComplex((double)z.x, (double)z.y); } -__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { +__HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { float real = (p.x * q.x) + r.x; float imag = (q.x * p.y) + r.y; @@ -290,7 +295,7 @@ __device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q return make_hipComplex(real, imag); } -__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, +__HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r) { double real = (p.x * q.x) + r.x; double imag = (q.x * p.y) + r.y; diff --git a/include/hip/amd_detail/hip_fp16.h b/include/hip/amd_detail/hip_fp16.h index 859c3efd9e..fb344aa7d5 100644 --- a/include/hip/amd_detail/hip_fp16.h +++ b/include/hip/amd_detail/hip_fp16.h @@ -25,14 +25,18 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H #include - #include "hip/amd_detail/host_defines.h" -#include -#if defined(__cplusplus) +#if defined(__HIPCC_RTC__) + #define __HOST_DEVICE__ __device__ +#else + #define __HOST_DEVICE__ __host__ __device__ + #include + #if defined(__cplusplus) #include #include #include #endif +#endif // !defined(__HIPCC_RTC__) #if __HIP_CLANG_ONLY__ typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2))); @@ -82,46 +86,46 @@ THE SOFTWARE. }; public: // CREATORS - __host__ __device__ + __HOST_DEVICE__ __half() = default; - __host__ __device__ + __HOST_DEVICE__ __half(const __half_raw& x) : data{x.data} {} #if !defined(__HIP_NO_HALF_CONVERSIONS__) - __host__ __device__ + __HOST_DEVICE__ __half(decltype(data) x) : data{x} {} template< typename T, Enable_if_t{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ __half(T x) : data{static_cast<_Float16>(x)} {} #endif - __host__ __device__ + __HOST_DEVICE__ __half(const __half&) = default; - __host__ __device__ + __HOST_DEVICE__ __half(__half&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~__half() = default; // CREATORS - DEVICE ONLY #if !defined(__HIP_NO_HALF_CONVERSIONS__) template< typename T, Enable_if_t{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ __half(T x) : data{static_cast<_Float16>(x)} {} #endif // MANIPULATORS - __host__ __device__ + __HOST_DEVICE__ __half& operator=(const __half&) = default; - __host__ __device__ + __HOST_DEVICE__ __half& operator=(__half&&) = default; - __host__ __device__ + __HOST_DEVICE__ __half& operator=(const __half_raw& x) { data = x.data; return *this; } - __host__ __device__ + __HOST_DEVICE__ volatile __half& operator=(const __half_raw& x) volatile { data = x.data; @@ -151,7 +155,7 @@ THE SOFTWARE. template< typename T, Enable_if_t{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ __half& operator=(T x) { data = static_cast<_Float16>(x); @@ -221,12 +225,12 @@ THE SOFTWARE. template< typename T, Enable_if_t{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ operator T() const { return data; } #endif - __host__ __device__ + __HOST_DEVICE__ operator __half_raw() const { return __half_raw{data}; } - __host__ __device__ + __HOST_DEVICE__ operator __half_raw() const volatile { return __half_raw{data}; @@ -235,7 +239,7 @@ THE SOFTWARE. #if !defined(__HIP_NO_HALF_CONVERSIONS__) template< typename T, Enable_if_t{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ operator T() const { return data; } #endif @@ -342,38 +346,38 @@ THE SOFTWARE. }; // CREATORS - __host__ __device__ + __HOST_DEVICE__ __half2() = default; - __host__ __device__ + __HOST_DEVICE__ __half2(const __half2_raw& x) : data{x.data} {} - __host__ __device__ + __HOST_DEVICE__ __half2(decltype(data) x) : data{x} {} - __host__ __device__ + __HOST_DEVICE__ __half2(const __half& x, const __half& y) : data{ static_cast<__half_raw>(x).data, static_cast<__half_raw>(y).data} {} - __host__ __device__ + __HOST_DEVICE__ __half2(const __half2&) = default; - __host__ __device__ + __HOST_DEVICE__ __half2(__half2&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~__half2() = default; // MANIPULATORS - __host__ __device__ + __HOST_DEVICE__ __half2& operator=(const __half2&) = default; - __host__ __device__ + __HOST_DEVICE__ __half2& operator=(__half2&&) = default; - __host__ __device__ + __HOST_DEVICE__ __half2& operator=(const __half2_raw& x) { data = x.data; return *this; } - + // MANIPULATORS - DEVICE ONLY #if !defined(__HIP_NO_HALF_OPERATORS__) __device__ @@ -421,9 +425,9 @@ THE SOFTWARE. #endif // ACCESSORS - __host__ __device__ + __HOST_DEVICE__ operator decltype(data)() const { return data; } - __host__ __device__ + __HOST_DEVICE__ operator __half2_raw() const { return __half2_raw{data}; } // ACCESSORS - DEVICE ONLY @@ -520,42 +524,42 @@ THE SOFTWARE. namespace { inline - __host__ __device__ + __HOST_DEVICE__ __half2 make_half2(__half x, __half y) { return __half2{x, y}; } inline - __host__ __device__ + __HOST_DEVICE__ __half __low2half(__half2 x) { return __half{__half_raw{static_cast<__half2_raw>(x).data.x}}; } inline - __host__ __device__ + __HOST_DEVICE__ __half __high2half(__half2 x) { return __half{__half_raw{static_cast<__half2_raw>(x).data.y}}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __half2half2(__half x) { return __half2{x, x}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __halves2half2(__half x, __half y) { return __half2{x, y}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __low2half2(__half2 x) { return __half2{ @@ -565,7 +569,7 @@ THE SOFTWARE. } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __high2half2(__half2 x) { return __half2_raw{ @@ -575,7 +579,7 @@ THE SOFTWARE. } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __lows2half2(__half2 x, __half2 y) { return __half2_raw{ @@ -585,7 +589,7 @@ THE SOFTWARE. } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __highs2half2(__half2 x, __half2 y) { return __half2_raw{ @@ -595,7 +599,7 @@ THE SOFTWARE. } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __lowhigh2highlow(__half2 x) { return __half2_raw{ @@ -638,37 +642,37 @@ THE SOFTWARE. // TODO: rounding behaviour is not correct. // float -> half | half2 inline - __device__ __host__ + __HOST_DEVICE__ __half __float2half(float x) { return __half_raw{static_cast<_Float16>(x)}; } inline - __device__ __host__ + __HOST_DEVICE__ __half __float2half_rn(float x) { return __half_raw{static_cast<_Float16>(x)}; } inline - __device__ __host__ + __HOST_DEVICE__ __half __float2half_rz(float x) { return __half_raw{static_cast<_Float16>(x)}; } inline - __device__ __host__ + __HOST_DEVICE__ __half __float2half_rd(float x) { return __half_raw{static_cast<_Float16>(x)}; } inline - __device__ __host__ + __HOST_DEVICE__ __half __float2half_ru(float x) { return __half_raw{static_cast<_Float16>(x)}; } inline - __device__ __host__ + __HOST_DEVICE__ __half2 __float2half2_rn(float x) { return __half2_raw{ @@ -676,14 +680,14 @@ THE SOFTWARE. static_cast<_Float16>(x), static_cast<_Float16>(x)}}; } inline - __device__ __host__ + __HOST_DEVICE__ __half2 __floats2half2_rn(float x, float y) { return __half2_raw{_Float16_2{ static_cast<_Float16>(x), static_cast<_Float16>(y)}}; } inline - __device__ __host__ + __HOST_DEVICE__ __half2 __float22half2_rn(float2 x) { return __floats2half2_rn(x.x, x.y); @@ -691,25 +695,25 @@ THE SOFTWARE. // half | half2 -> float inline - __device__ __host__ + __HOST_DEVICE__ float __half2float(__half x) { return static_cast<__half_raw>(x).data; } inline - __device__ __host__ + __HOST_DEVICE__ float __low2float(__half2 x) { return static_cast<__half2_raw>(x).data.x; } inline - __device__ __host__ + __HOST_DEVICE__ float __high2float(__half2 x) { return static_cast<__half2_raw>(x).data.y; } inline - __device__ __host__ + __HOST_DEVICE__ float2 __half22float2(__half2 x) { return make_float2( @@ -1044,16 +1048,16 @@ THE SOFTWARE. __half __ldcs(const __half* ptr) { return *ptr; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __ldg(const __half2* ptr) { return *ptr; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __ldcg(const __half2* ptr) { return *ptr; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __ldca(const __half2* ptr) { return *ptr; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __ldcs(const __half2* ptr) { return *ptr; } // Relations @@ -1119,7 +1123,7 @@ THE SOFTWARE. bool __hgtu(__half x, __half y) { return __hgt(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __heq2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(x).data == @@ -1127,7 +1131,7 @@ THE SOFTWARE. return __builtin_convertvector(-r, _Float16_2); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hne2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(x).data != @@ -1135,7 +1139,7 @@ THE SOFTWARE. return __builtin_convertvector(-r, _Float16_2); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hle2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(x).data <= @@ -1143,7 +1147,7 @@ THE SOFTWARE. return __builtin_convertvector(-r, _Float16_2); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hge2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(x).data >= @@ -1151,7 +1155,7 @@ THE SOFTWARE. return __builtin_convertvector(-r, _Float16_2); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hlt2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(x).data < @@ -1159,7 +1163,7 @@ THE SOFTWARE. return __builtin_convertvector(-r, _Float16_2); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hgt2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(x).data > @@ -1167,83 +1171,83 @@ THE SOFTWARE. return __builtin_convertvector(-r, _Float16_2); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hequ2(__half2 x, __half2 y) { return __heq2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hneu2(__half2 x, __half2 y) { return __hne2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hleu2(__half2 x, __half2 y) { return __hle2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hgeu2(__half2 x, __half2 y) { return __hge2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hltu2(__half2 x, __half2 y) { return __hlt2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hgtu2(__half2 x, __half2 y) { return __hgt2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbeq2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__heq2(x, y)); return r.data.x != 0 && r.data.y != 0; } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbne2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hne2(x, y)); return r.data.x != 0 && r.data.y != 0; } inline - __host__ __device__ + __HOST_DEVICE__ bool __hble2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hle2(x, y)); return r.data.x != 0 && r.data.y != 0; } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbge2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hge2(x, y)); return r.data.x != 0 && r.data.y != 0; } inline - __host__ __device__ + __HOST_DEVICE__ bool __hblt2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hlt2(x, y)); return r.data.x != 0 && r.data.y != 0; } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbgt2(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hgt2(x, y)); return r.data.x != 0 && r.data.y != 0; } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); } inline - __host__ __device__ + __HOST_DEVICE__ bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); } // Arithmetic @@ -1332,7 +1336,7 @@ THE SOFTWARE. } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hadd2(__half2 x, __half2 y) { return __half2_raw{ @@ -1340,14 +1344,14 @@ THE SOFTWARE. static_cast<__half2_raw>(y).data}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __habs2(__half2 x) { return __half2_raw{ __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hsub2(__half2 x, __half2 y) { return __half2_raw{ @@ -1355,7 +1359,7 @@ THE SOFTWARE. static_cast<__half2_raw>(y).data}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hmul2(__half2 x, __half2 y) { return __half2_raw{ @@ -1363,7 +1367,7 @@ THE SOFTWARE. static_cast<__half2_raw>(y).data}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hadd2_sat(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hadd2(x, y)); @@ -1372,7 +1376,7 @@ THE SOFTWARE. __clamp_01(__half_raw{r.data.y})}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hsub2_sat(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hsub2(x, y)); @@ -1381,7 +1385,7 @@ THE SOFTWARE. __clamp_01(__half_raw{r.data.y})}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hmul2_sat(__half2 x, __half2 y) { auto r = static_cast<__half2_raw>(__hmul2(x, y)); @@ -1390,13 +1394,13 @@ THE SOFTWARE. __clamp_01(__half_raw{r.data.y})}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hfma2(__half2 x, __half2 y, __half2 z) { return __half2_raw{__ocml_fma_2f16(x, y, z)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z) { auto r = static_cast<__half2_raw>(__hfma2(x, y, z)); @@ -1405,7 +1409,7 @@ THE SOFTWARE. __clamp_01(__half_raw{r.data.y})}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __h2div(__half2 x, __half2 y) { return __half2_raw{ @@ -1548,82 +1552,82 @@ THE SOFTWARE. } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2trunc(__half2 x) { return __half2_raw{__ocml_trunc_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2ceil(__half2 x) { return __half2_raw{__ocml_ceil_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2floor(__half2 x) { return __half2_raw{__ocml_floor_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2rint(__half2 x) { return __half2_raw{__ocml_rint_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2sin(__half2 x) { return __half2_raw{__ocml_sin_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2cos(__half2 x) { return __half2_raw{__ocml_cos_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2exp(__half2 x) { return __half2_raw{__ocml_exp_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2exp2(__half2 x) { return __half2_raw{__ocml_exp2_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2exp10(__half2 x) { return __half2_raw{__ocml_exp10_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2log2(__half2 x) { return __half2_raw{__ocml_log2_2f16(x)}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2log(__half2 x) { return __ocml_log_2f16(x); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2rcp(__half2 x) { return __llvm_amdgcn_rcp_2f16(x); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hisinf2(__half2 x) { auto r = __ocml_isinf_2f16(x); @@ -1631,7 +1635,7 @@ THE SOFTWARE. static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hisnan2(__half2 x) { auto r = __ocml_isnan_2f16(x); @@ -1639,7 +1643,7 @@ THE SOFTWARE. static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}}; } inline - __host__ __device__ + __HOST_DEVICE__ __half2 __hneg2(__half2 x) { return __half2_raw{-static_cast<__half2_raw>(x).data}; diff --git a/include/hip/amd_detail/hip_runtime.h b/include/hip/amd_detail/hip_runtime.h index ca88fda2f0..a3db57ffe3 100644 --- a/include/hip/amd_detail/hip_runtime.h +++ b/include/hip/amd_detail/hip_runtime.h @@ -34,6 +34,7 @@ THE SOFTWARE. //--- // Top part of file can be compiled with any compiler +#if !defined(__HIPCC_RTC__) //#include #if __cplusplus #include @@ -42,7 +43,8 @@ THE SOFTWARE. #include #include #include -#endif //__cplusplus +#endif // __cplusplus +#endif // !defined(__HIPCC_RTC__) // __hip_malloc is not working. Disable it by default. #ifndef __HIP_ENABLE_DEVICE_MALLOC__ @@ -57,9 +59,10 @@ THE SOFTWARE. #define CUDA_SUCCESS hipSuccess +#if !defined(__HIPCC_RTC__) #include - extern int HIP_TRACE_API; +#endif // !defined(__HIPCC_RTC__) #ifdef __cplusplus #include @@ -121,7 +124,9 @@ extern int HIP_TRACE_API; #define __launch_bounds__(...) \ select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) +#if !defined(__HIPCC_RTC__) __host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } +#endif // !defined(__HIPCC_RTC__) #if __HIP_ARCH_GFX701__ == 0 @@ -162,6 +167,7 @@ static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullpt // // hip-clang functions // +#if !defined(__HIPCC_RTC__) #define HIP_KERNEL_NAME(...) __VA_ARGS__ #define HIP_SYMBOL(X) X @@ -218,6 +224,8 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, #endif #include +#endif // !defined(__HIPCC_RTC__) + extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint); extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint); extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint); @@ -245,6 +253,17 @@ struct __HIP_ThreadIdx { } }; +#if defined(__HIPCC_RTC__) +typedef struct dim3 { + uint32_t x; ///< x + uint32_t y; ///< y + uint32_t z; ///< z +#ifdef __cplusplus + constexpr __device__ dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){}; +#endif +} dim3; +#endif // !defined(__HIPCC_RTC__) + template struct __HIP_Coordinates { using R = decltype(F{}(0)); @@ -371,6 +390,7 @@ hc_get_workitem_absolute_id(int dim) #endif #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ +#if !defined(__HIPCC_RTC__) // Support std::complex. #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ #pragma push_macro("__CUDA__") @@ -388,6 +408,7 @@ hc_get_workitem_absolute_id(int dim) #undef __CUDA__ #pragma pop_macro("__CUDA__") #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ +#endif // !defined(__HIPCC_RTC__) #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ #endif // __HIP_CLANG_ONLY__ diff --git a/include/hip/amd_detail/hip_texture_types.h b/include/hip/amd_detail/hip_texture_types.h index 22016e25ad..0dc40ec0ed 100644 --- a/include/hip/amd_detail/hip_texture_types.h +++ b/include/hip/amd_detail/hip_texture_types.h @@ -33,9 +33,10 @@ THE SOFTWARE. * * * * *******************************************************************************/ +#if !defined(__HIPCC_RTC__) #include -//#include #include +#endif // !defined(__HIPCC_RTC__) #include #if __cplusplus diff --git a/include/hip/amd_detail/hip_vector_types.h b/include/hip/amd_detail/hip_vector_types.h index 88635befc8..72a828aaf2 100644 --- a/include/hip/amd_detail/hip_vector_types.h +++ b/include/hip/amd_detail/hip_vector_types.h @@ -30,6 +30,12 @@ THE SOFTWARE. #include "hip/amd_detail/host_defines.h" +#if defined(__HIPCC_RTC__) + #define __HOST_DEVICE__ __device__ +#else + #define __HOST_DEVICE__ __host__ __device__ +#endif + #if defined(__has_attribute) #if __has_attribute(ext_vector_type) #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n))) @@ -38,9 +44,11 @@ THE SOFTWARE. #endif #if defined(__cplusplus) +#if !defined(__HIPCC_RTC__) #include #include #include +#endif // !defined(__HIPCC_RTC__) namespace hip_impl { template struct Scalar_accessor; @@ -61,20 +69,20 @@ THE SOFTWARE. struct Address { const Scalar_accessor* p; - __host__ __device__ + __HOST_DEVICE__ operator const T*() const noexcept { return &reinterpret_cast(p)[idx]; } - __host__ __device__ + __HOST_DEVICE__ operator const T*() const volatile noexcept { return &reinterpret_cast(p)[idx]; } - __host__ __device__ + __HOST_DEVICE__ operator T*() noexcept { return &reinterpret_cast( const_cast(p))[idx]; } - __host__ __device__ + __HOST_DEVICE__ operator T*() volatile noexcept { return &reinterpret_cast( const_cast(p))[idx]; @@ -101,9 +109,9 @@ THE SOFTWARE. // Idea from https://t0rakka.silvrback.com/simd-scalar-accessor Vector data; - __host__ __device__ + __HOST_DEVICE__ operator T() const noexcept { return data[idx]; } - __host__ __device__ + __HOST_DEVICE__ operator T() const volatile noexcept { return data[idx]; } #ifdef __HIP_ENABLE_VECTOR_SCALAR_ACCESSORY_ENUM_CONVERSION__ @@ -116,7 +124,7 @@ THE SOFTWARE. std::is_enum{} && std::is_convertible< T, typename std::enable_if::value, std::underlying_type>::type::type>{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ operator U() const noexcept { return static_cast(data[idx]); } template< typename U, @@ -125,60 +133,60 @@ THE SOFTWARE. std::is_enum{} && std::is_convertible< T, typename std::enable_if::value, std::underlying_type>::type::type>{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ operator U() const volatile noexcept { return static_cast(data[idx]); } #endif - __host__ __device__ + __HOST_DEVICE__ operator T&() noexcept { return reinterpret_cast< T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx]; } - __host__ __device__ + __HOST_DEVICE__ operator volatile T&() volatile noexcept { return reinterpret_cast< volatile T (&)[sizeof(Vector) / sizeof(T)]>(data)[idx]; } - __host__ __device__ + __HOST_DEVICE__ Address operator&() const noexcept { return Address{this}; } - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator=(const Scalar_accessor& x) noexcept { data[idx] = x.data[idx]; return *this; } - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator=(T x) noexcept { data[idx] = x; return *this; } - __host__ __device__ + __HOST_DEVICE__ volatile Scalar_accessor& operator=(T x) volatile noexcept { data[idx] = x; return *this; } - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator++() noexcept { ++data[idx]; return *this; } - __host__ __device__ + __HOST_DEVICE__ T operator++(int) noexcept { auto r{data[idx]}; ++data[idx]; return *this; } - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator--() noexcept { --data[idx]; return *this; } - __host__ __device__ + __HOST_DEVICE__ T operator--(int) noexcept { auto r{data[idx]}; --data[idx]; @@ -191,7 +199,7 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator+=(U x) noexcept { data[idx] += x; return *this; @@ -200,7 +208,7 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator-=(U x) noexcept { data[idx] -= x; return *this; @@ -210,7 +218,7 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator*=(U x) noexcept { data[idx] *= x; return *this; @@ -219,7 +227,7 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator/=(U x) noexcept { data[idx] /= x; return *this; @@ -228,7 +236,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{} && std::is_integral{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator%=(U x) noexcept { data[idx] %= x; return *this; @@ -238,7 +246,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{} && std::is_integral{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator>>=(U x) noexcept { data[idx] >>= x; return *this; @@ -247,7 +255,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{} && std::is_integral{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator<<=(U x) noexcept { data[idx] <<= x; return *this; @@ -256,7 +264,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{} && std::is_integral{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator&=(U x) noexcept { data[idx] &= x; return *this; @@ -265,7 +273,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{} && std::is_integral{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator|=(U x) noexcept { data[idx] |= x; return *this; @@ -274,7 +282,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{} && std::is_integral{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Scalar_accessor& operator^=(U x) noexcept { data[idx] ^= x; return *this; @@ -308,22 +316,22 @@ THE SOFTWARE. using value_type = T; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ explicit constexpr HIP_vector_base(T x_) noexcept : data{x_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(const HIP_vector_base&) = default; - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(HIP_vector_base&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base& operator=(const HIP_vector_base& x_) noexcept { #if __has_attribute(ext_vector_type) data = x_.data; @@ -358,25 +366,25 @@ THE SOFTWARE. using value_type = T; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ explicit constexpr HIP_vector_base(T x_) noexcept : data{x_, x_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(const HIP_vector_base&) = default; - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(HIP_vector_base&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base& operator=(const HIP_vector_base& x_) noexcept { #if __has_attribute(ext_vector_type) data = x_.data; @@ -394,55 +402,55 @@ THE SOFTWARE. struct Native_vec_ { T d[3]; - __host__ __device__ + __HOST_DEVICE__ Native_vec_() = default; - __host__ __device__ + __HOST_DEVICE__ explicit constexpr Native_vec_(T x_) noexcept : d{x_, x_, x_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr Native_vec_(const Native_vec_&) = default; - __host__ __device__ + __HOST_DEVICE__ constexpr Native_vec_(Native_vec_&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~Native_vec_() = default; - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator=(const Native_vec_&) = default; - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator=(Native_vec_&&) = default; - __host__ __device__ + __HOST_DEVICE__ T& operator[](unsigned int idx) noexcept { return d[idx]; } - __host__ __device__ + __HOST_DEVICE__ T operator[](unsigned int idx) const noexcept { return d[idx]; } - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator+=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i]; return *this; } - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator-=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i]; return *this; } - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator*=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i]; return *this; } - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator/=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i]; @@ -452,7 +460,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_ operator-() const noexcept { auto r{*this}; @@ -463,7 +471,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_ operator~() const noexcept { auto r{*this}; @@ -473,7 +481,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator%=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i]; @@ -482,7 +490,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator^=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i]; @@ -491,7 +499,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator|=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i]; @@ -500,7 +508,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator&=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i]; @@ -509,7 +517,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator>>=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i]; @@ -518,7 +526,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ Native_vec_& operator<<=(const Native_vec_& x_) noexcept { for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i]; @@ -526,7 +534,7 @@ THE SOFTWARE. } using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int)))); - __host__ __device__ + __HOST_DEVICE__ Vec3_cmp operator==(const Native_vec_& x_) const noexcept { return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]}; @@ -544,27 +552,27 @@ THE SOFTWARE. using value_type = T; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ explicit constexpr HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(const HIP_vector_base&) = default; - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(HIP_vector_base&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base& operator=(const HIP_vector_base&) = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base& operator=(HIP_vector_base&&) = default; }; @@ -595,25 +603,25 @@ THE SOFTWARE. using value_type = T; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ explicit constexpr HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(const HIP_vector_base&) = default; - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_base(HIP_vector_base&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~HIP_vector_base() = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_base& operator=(const HIP_vector_base& x_) noexcept { #if __has_attribute(ext_vector_type) data = x_.data; @@ -633,13 +641,13 @@ THE SOFTWARE. using HIP_vector_base::data; using typename HIP_vector_base::Native_vec_; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type() = default; template< typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ explicit constexpr HIP_vector_type(U x_) noexcept @@ -649,32 +657,32 @@ THE SOFTWARE. typename... Us, typename std::enable_if< (rank > 1) && sizeof...(Us) == rank>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_type(Us... xs) noexcept : HIP_vector_base{static_cast(xs)...} {} - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_type(const HIP_vector_type&) = default; - __host__ __device__ + __HOST_DEVICE__ constexpr HIP_vector_type(HIP_vector_type&&) = default; - __host__ __device__ + __HOST_DEVICE__ ~HIP_vector_type() = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator=(const HIP_vector_type&) = default; - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator=(HIP_vector_type&&) = default; // Operators - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator++() noexcept { return *this += HIP_vector_type{1}; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type operator++(int) noexcept { auto tmp(*this); @@ -682,12 +690,12 @@ THE SOFTWARE. return tmp; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator--() noexcept { return *this -= HIP_vector_type{1}; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type operator--(int) noexcept { auto tmp(*this); @@ -695,7 +703,7 @@ THE SOFTWARE. return tmp; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept { data += x.data; @@ -705,13 +713,13 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator+=(U x) noexcept { return *this += HIP_vector_type{x}; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept { data -= x.data; @@ -721,13 +729,13 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator-=(U x) noexcept { return *this -= HIP_vector_type{x}; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept { data *= x.data; @@ -737,13 +745,13 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator*=(U x) noexcept { return *this *= HIP_vector_type{x}; } - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept { data /= x.data; @@ -753,7 +761,7 @@ THE SOFTWARE. typename U, typename std::enable_if< std::is_convertible{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator/=(U x) noexcept { return *this /= HIP_vector_type{x}; @@ -762,7 +770,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type operator-() const noexcept { auto tmp(*this); @@ -773,7 +781,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type operator~() const noexcept { HIP_vector_type r{*this}; @@ -784,7 +792,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept { data %= x.data; @@ -794,7 +802,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept { data ^= x.data; @@ -804,7 +812,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept { data |= x.data; @@ -814,7 +822,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept { data &= x.data; @@ -824,7 +832,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept { data >>= x.data; @@ -834,7 +842,7 @@ THE SOFTWARE. template< typename U = T, typename std::enable_if{}>::type* = nullptr> - __host__ __device__ + __HOST_DEVICE__ HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept { data <<= x.data; @@ -843,7 +851,7 @@ THE SOFTWARE. }; template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator+( @@ -852,7 +860,7 @@ THE SOFTWARE. return HIP_vector_type{x} += y; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator+( @@ -861,7 +869,7 @@ THE SOFTWARE. return HIP_vector_type{x} += HIP_vector_type{y}; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator+( @@ -871,7 +879,7 @@ THE SOFTWARE. } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator-( @@ -880,7 +888,7 @@ THE SOFTWARE. return HIP_vector_type{x} -= y; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator-( @@ -889,7 +897,7 @@ THE SOFTWARE. return HIP_vector_type{x} -= HIP_vector_type{y}; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator-( @@ -899,7 +907,7 @@ THE SOFTWARE. } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( @@ -908,7 +916,7 @@ THE SOFTWARE. return HIP_vector_type{x} *= y; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( @@ -917,7 +925,7 @@ THE SOFTWARE. return HIP_vector_type{x} *= HIP_vector_type{y}; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator*( @@ -927,7 +935,7 @@ THE SOFTWARE. } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( @@ -936,7 +944,7 @@ THE SOFTWARE. return HIP_vector_type{x} /= y; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( @@ -945,7 +953,7 @@ THE SOFTWARE. return HIP_vector_type{x} /= HIP_vector_type{y}; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator/( @@ -955,7 +963,7 @@ THE SOFTWARE. } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool _hip_any_zero(const V& x, int n) noexcept @@ -965,7 +973,7 @@ THE SOFTWARE. } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool operator==( @@ -974,7 +982,7 @@ THE SOFTWARE. return _hip_any_zero(x.data == y.data, n - 1); } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool operator==(const HIP_vector_type& x, U y) noexcept @@ -982,7 +990,7 @@ THE SOFTWARE. return x == HIP_vector_type{y}; } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool operator==(U x, const HIP_vector_type& y) noexcept @@ -991,7 +999,7 @@ THE SOFTWARE. } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool operator!=( @@ -1000,7 +1008,7 @@ THE SOFTWARE. return !(x == y); } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool operator!=(const HIP_vector_type& x, U y) noexcept @@ -1008,7 +1016,7 @@ THE SOFTWARE. return !(x == y); } template - __host__ __device__ + __HOST_DEVICE__ inline constexpr bool operator!=(U x, const HIP_vector_type& y) noexcept @@ -1020,7 +1028,7 @@ THE SOFTWARE. typename T, unsigned int n, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator%( @@ -1033,7 +1041,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator%( @@ -1046,7 +1054,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator%( @@ -1059,7 +1067,7 @@ THE SOFTWARE. typename T, unsigned int n, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator^( @@ -1072,7 +1080,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator^( @@ -1085,7 +1093,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator^( @@ -1098,7 +1106,7 @@ THE SOFTWARE. typename T, unsigned int n, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator|( @@ -1111,7 +1119,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator|( @@ -1124,7 +1132,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator|( @@ -1137,7 +1145,7 @@ THE SOFTWARE. typename T, unsigned int n, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator&( @@ -1150,7 +1158,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator&( @@ -1163,7 +1171,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator&( @@ -1176,7 +1184,7 @@ THE SOFTWARE. typename T, unsigned int n, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator>>( @@ -1189,7 +1197,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator>>( @@ -1202,7 +1210,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator>>( @@ -1215,7 +1223,7 @@ THE SOFTWARE. typename T, unsigned int n, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator<<( @@ -1228,7 +1236,7 @@ THE SOFTWARE. unsigned int n, typename U, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator<<( @@ -1242,7 +1250,7 @@ THE SOFTWARE. typename U, typename std::enable_if::value>::type, typename std::enable_if{}>* = nullptr> - __host__ __device__ + __HOST_DEVICE__ inline constexpr HIP_vector_type operator<<( @@ -1293,38 +1301,38 @@ __MAKE_VECTOR_TYPE__(double, double); #ifdef __cplusplus #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x) { type r{x}; return r; } #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x, comp y) { type r{x, y}; return r; } #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x, comp y, comp z) { type r{x, y, z}; return r; } #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x, comp y, comp z, comp w) { \ type r{x, y, z, w}; \ return r; \ } #else #define DECLOP_MAKE_ONE_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x) { type r; r.x =x; return r; } #define DECLOP_MAKE_TWO_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x, comp y) { type r; r.x=x; r.y=y; return r; } #define DECLOP_MAKE_THREE_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x, comp y, comp z) { type r; r.x=x; r.y=y; r.z=z; return r; } #define DECLOP_MAKE_FOUR_COMPONENT(comp, type) \ - static inline __device__ __host__ \ + static inline __HOST_DEVICE__ \ type make_##type(comp x, comp y, comp z, comp w) { \ type r; r.x=x; r.y=y; r.z=z; r.w=w; \ return r; \ diff --git a/include/hip/amd_detail/math_functions.h b/include/hip/amd_detail/math_functions.h index c2dfffb77a..2cbee4829a 100644 --- a/include/hip/amd_detail/math_functions.h +++ b/include/hip/amd_detail/math_functions.h @@ -28,18 +28,18 @@ THE SOFTWARE. #include +#if !defined(__HIPCC_RTC__) #include - // assert.h is only for the host version of assert. // The device version of assert is implemented in hip/amd_detail/hip_runtime.h. // Users should include hip_runtime.h for the device version of assert. #if !__HIP_DEVICE_COMPILE__ #include #endif - #include #include #include +#endif // !defined(__HIPCC_RTC__) #if _LIBCPP_VERSION && __HIP__ namespace std { @@ -1460,6 +1460,7 @@ double min(double x, double y) { __HIP_OVERLOAD2(double, max) __HIP_OVERLOAD2(double, min) +#if !defined(__HIPCC_RTC__) __host__ inline static int min(int arg1, int arg2) { return std::min(arg1, arg2); } @@ -1467,6 +1468,7 @@ __host__ inline static int min(int arg1, int arg2) { __host__ inline static int max(int arg1, int arg2) { return std::max(arg1, arg2); } +#endif // !defined(__HIPCC_RTC__) __DEVICE__ inline float pow(float base, int iexp) { diff --git a/include/hip/amd_detail/texture_fetch_functions.h b/include/hip/amd_detail/texture_fetch_functions.h index 004b021b80..399e4fecf7 100644 --- a/include/hip/amd_detail/texture_fetch_functions.h +++ b/include/hip/amd_detail/texture_fetch_functions.h @@ -25,10 +25,12 @@ THE SOFTWARE. #if defined(__cplusplus) #include -#include +#include #include +#if !defined(__HIPCC_RTC__) #include +#endif // !defined(__HIPCC_RTC__) #define TEXTURE_PARAMETERS_INIT \ unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \ diff --git a/include/hip/amd_detail/texture_indirect_functions.h b/include/hip/amd_detail/texture_indirect_functions.h index d55c9a50a4..87279da8c0 100644 --- a/include/hip/amd_detail/texture_indirect_functions.h +++ b/include/hip/amd_detail/texture_indirect_functions.h @@ -28,7 +28,9 @@ THE SOFTWARE. #include #include +#if !defined(__HIPCC_RTC__) #include +#endif // !defined(__HIPCC_RTC__) #define TEXTURE_OBJECT_PARAMETERS_INIT \ unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \ diff --git a/include/hip/hip_runtime.h b/include/hip/hip_runtime.h index e989eaa28e..73dd87226f 100644 --- a/include/hip/hip_runtime.h +++ b/include/hip/hip_runtime.h @@ -41,6 +41,7 @@ THE SOFTWARE. #error HIP is not supported on GFX10 with wavefront size 64 #endif +#if !defined(__HIPCC_RTC__) // Some standard header files, these are included by hc.hpp and so want to make them avail on both // paths to provide a consistent include env and avoid "missing symbol" errors that only appears // on NVCC path: @@ -52,6 +53,7 @@ THE SOFTWARE. #if __cplusplus > 199711L #include #endif +#endif // !defined(__HIPCC_RTC__) #include #include @@ -107,8 +109,10 @@ THE SOFTWARE. #endif // defined(__clang__) #endif +#if !defined(__HIPCC_RTC__) #include -#include #include +#endif // !defined(__HIPCC_RTC__) +#include #endif