From 78a3dc739d83f4bf06d7533bb06415035f66b981 Mon Sep 17 00:00:00 2001 From: Satyanvesh Dittakavi Date: Wed, 26 Apr 2023 18:13:36 +0000 Subject: [PATCH] SWDEV-395996 - Add HIPRTC support for missing hip headers hip headers such as hip_math_constants.h, library_types.h, hip_common.h, channel_descriptor.h, device_functions.h, hip_complex.h, hip_texture_types.h, math_functions.h, surface_types.h are added in HIPRTC Change-Id: I4a4c198449ceb609c3ff55e00b43056c1f085431 [ROCm/clr commit: d7d0f1131882ea1f42b7c42235b66d88cd9305a1] --- projects/clr/hipamd/CMakeLists.txt | 18 +- .../hip/amd_detail/amd_channel_descriptor.h | 13 +- .../hip/amd_detail/amd_device_functions.h | 31 +- .../include/hip/amd_detail/amd_hip_atomic.h | 14 + .../include/hip/amd_detail/amd_hip_complex.h | 20 +- .../include/hip/amd_detail/amd_hip_runtime.h | 41 +- .../hip/amd_detail/amd_math_functions.h | 1415 +---------------- .../hip/amd_detail/amd_surface_functions.h | 28 +- .../hip/amd_detail/device_library_decls.h | 15 +- .../hipamd/include/hip/amd_detail/math_fwd.h | 17 +- .../include/hip/amd_detail/ockl_image.h | 15 +- .../hip/amd_detail/texture_fetch_functions.h | 21 +- .../amd_detail/texture_indirect_functions.h | 22 +- projects/clr/hipamd/packaging/CMakeLists.txt | 6 +- projects/clr/hipamd/src/CMakeLists.txt | 4 +- projects/clr/hipamd/src/hiprtc/CMakeLists.txt | 37 +- .../clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake | 35 +- 17 files changed, 271 insertions(+), 1481 deletions(-) diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt index 98432a13cd..194e50a47a 100755 --- a/projects/clr/hipamd/CMakeLists.txt +++ b/projects/clr/hipamd/CMakeLists.txt @@ -292,15 +292,15 @@ set(_versionInfoHeader ") file(WRITE "${PROJECT_BINARY_DIR}/include/hip/hip_version.h" ${_versionInfoHeader}) -if(HIP_RUNTIME STREQUAL "rocclr") - add_subdirectory(src) -endif() - # Generate .hipInfo -file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) +file(WRITE "${PROJECT_BINARY_DIR}/share/hip/.hipInfo" ${_buildInfo}) # Generate version -file(WRITE "${PROJECT_BINARY_DIR}/version" ${_versionInfo}) +file(WRITE "${PROJECT_BINARY_DIR}/share/hip/version" ${_versionInfo}) + +if(HIP_RUNTIME STREQUAL "rocclr") + add_subdirectory(src) +endif() # Build doxygen documentation find_program(DOXYGEN_EXE doxygen) @@ -321,14 +321,14 @@ endif() ############################# # Install .hipInfo -install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(FILES ${PROJECT_BINARY_DIR}/share/hip/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR}) # Install version -install(FILES ${PROJECT_BINARY_DIR}/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip) +install(FILES ${PROJECT_BINARY_DIR}/share/hip/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip) # .hipVersion is added to satisfy Windows compute build. #TODO to be removed if(WIN32) - install(FILES ${PROJECT_BINARY_DIR}/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion) + install(FILES ${PROJECT_BINARY_DIR}/share/hip/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion) endif() # Install src, bin, include & cmake if necessary diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h b/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h index bf2c337901..f5ba75ebe6 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,9 +23,16 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H #define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#endif + +#if !defined(__HIPCC_RTC__) #include #include #include +#endif #ifdef __cplusplus @@ -353,4 +360,8 @@ struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, #endif /* __cplusplus */ +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + #endif /* !HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H */ diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h index 2dc3d620e8..6f230bfc21 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -23,18 +23,32 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H -#include "host_defines.h" -#include "math_fwd.h" +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#pragma clang diagnostic ignored "-Wimplicit-int-conversion" +#pragma clang diagnostic ignored "-Wimplicit-float-conversion" +#pragma clang diagnostic ignored "-Wmissing-noreturn" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#pragma clang diagnostic ignored "-Wunneeded-internal-declaration" +#pragma clang diagnostic ignored "-Wshift-count-overflow" +#endif #if !defined(__HIPCC_RTC__) +#include "host_defines.h" +#include "math_fwd.h" #include #include -#endif // !defined(__HIPCC_RTC__) - #include #include +#endif // !defined(__HIPCC_RTC__) -#if __HIP_CLANG_ONLY__ +#if defined(__clang__) && defined(__HIP__) extern "C" __device__ int printf(const char *fmt, ...); #else template @@ -639,7 +653,7 @@ __device__ static inline float __ull2float_rz(unsigned long long int x) { return __ocml_cvtrtz_f32_u64(x); } -#if __HIP_CLANG_ONLY__ +#if defined(__clang__) && defined(__HIP__) // Clock functions __device__ long long int __clock64(); @@ -1105,4 +1119,9 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) { return __hip_hc_memset(ptr, val8, size); } #endif // !__OPENMP_AMDGCN__ + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + #endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h index 4b4276935d..3f55831f77 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h @@ -22,7 +22,17 @@ THE SOFTWARE. #pragma once +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#pragma clang diagnostic ignored "-Wsign-conversion" +#endif + +#if !defined(__HIPCC_RTC__) #include "amd_device_functions.h" +#endif #if __has_builtin(__hip_atomic_compare_exchange_strong) @@ -1625,4 +1635,8 @@ unsigned long long atomicXor( return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); } +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + #endif // __hip_atomic_compare_exchange_strong diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h index 9d9dfd5e9d..933fd4e165 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,21 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" +#pragma clang diagnostic ignored "-Wimplicit-float-conversion" +#pragma clang diagnostic ignored "-Wdouble-promotion" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif + +#if !defined(__HIPCC_RTC__) #include "hip/amd_detail/amd_hip_vector_types.h" +#endif #if defined(__HIPCC_RTC__) #define __HOST_DEVICE__ __device__ @@ -311,4 +325,8 @@ __HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDo return make_hipDoubleComplex(real, imag); } +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + #endif //HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h index 2606ca0cf0..bd273b6b05 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -110,18 +110,19 @@ using ::int64_t; #if !defined(__HIPCC_RTC__) #include +#include +#include +#include +#include +#include extern int HIP_TRACE_API; #endif // !defined(__HIPCC_RTC__) #ifdef __cplusplus #include #endif -#include + #include -#include -#include -#include -#include // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__) @@ -244,10 +245,10 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, #include #endif // !defined(__HIPCC_RTC__) -extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint); -extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint); -extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint); -extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int); struct __HIP_BlockIdx { __device__ std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); } @@ -282,7 +283,7 @@ typedef struct dim3 { } dim3; #endif // !defined(__HIPCC_RTC__) -extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(unsigned int); #ifdef __cplusplus template struct __HIP_Coordinates { @@ -367,33 +368,35 @@ static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{}; static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{}; #endif // __cplusplus -extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int); #define hipThreadIdx_x (__ockl_get_local_id(0)) #define hipThreadIdx_y (__ockl_get_local_id(1)) #define hipThreadIdx_z (__ockl_get_local_id(2)) -extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int); #define hipBlockIdx_x (__ockl_get_group_id(0)) #define hipBlockIdx_y (__ockl_get_group_id(1)) #define hipBlockIdx_z (__ockl_get_group_id(2)) -extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int); #define hipBlockDim_x (__ockl_get_local_size(0)) #define hipBlockDim_y (__ockl_get_local_size(1)) #define hipBlockDim_z (__ockl_get_local_size(2)) -extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint); +extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int); #define hipGridDim_x (__ockl_get_num_groups(0)) #define hipGridDim_y (__ockl_get_num_groups(1)) #define hipGridDim_z (__ockl_get_num_groups(2)) +#if !defined(__HIPCC_RTC__) #include +#endif #if __HIP_HCC_COMPAT_MODE__ // Define HCC work item functions in terms of HIP builtin variables. #pragma push_macro("__DEFINE_HCC_FUNC") #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \ -inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \ +inline __device__ __attribute__((always_inline)) unsigned int hc_get_##hc_fun(unsigned int i) { \ if (i==0) \ return hip_var.x; \ else if(i==1) \ @@ -408,11 +411,11 @@ __DEFINE_HCC_FUNC(group_size, blockDim) __DEFINE_HCC_FUNC(num_groups, gridDim) #pragma pop_macro("__DEFINE_HCC_FUNC") -extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint); -inline __device__ __attribute__((always_inline)) uint +extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(unsigned int); +inline __device__ __attribute__((always_inline)) unsigned int hc_get_workitem_absolute_id(int dim) { - return (uint)__ockl_get_global_id(dim); + return (unsigned int)__ockl_get_global_id(dim); } #endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h index 3c17d298ea..37172f402f 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,13 +22,18 @@ THE SOFTWARE. #pragma once +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wundef" +#endif + +#if !defined(__HIPCC_RTC__) #include "hip_fp16_math_fwd.h" #include "amd_hip_vector_types.h" #include "math_fwd.h" #include -#if !defined(__HIPCC_RTC__) #include // assert.h is only for the host version of assert. // The device version of assert is implemented in hip/amd_detail/hip_runtime.h. @@ -60,77 +65,6 @@ struct __numeric_type<_Float16> #define __DEVICE__ static __device__ #define __RETURN_TYPE bool -#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ -__DEVICE__ -inline -uint64_t __make_mantissa_base8(const char* tagp) -{ - uint64_t r = 0; - while (tagp) { - char tmp = *tagp; - - if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0'; - else return 0; - - ++tagp; - } - - return r; -} - -__DEVICE__ -inline -uint64_t __make_mantissa_base10(const char* tagp) -{ - uint64_t r = 0; - while (tagp) { - char tmp = *tagp; - - if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0'; - else return 0; - - ++tagp; - } - - return r; -} - -__DEVICE__ -inline -uint64_t __make_mantissa_base16(const char* tagp) -{ - uint64_t r = 0; - while (tagp) { - char tmp = *tagp; - - if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0'; - else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10; - else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10; - else return 0; - - ++tagp; - } - - return r; -} - -__DEVICE__ -inline -uint64_t __make_mantissa(const char* tagp) -{ - if (!tagp) return 0u; - - if (*tagp == '0') { - ++tagp; - - if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp); - else return __make_mantissa_base8(tagp); - } - - return __make_mantissa_base10(tagp); -} -#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ - // DOT FUNCTIONS #if __HIP_CLANG_ONLY__ __DEVICE__ @@ -165,1338 +99,15 @@ uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { } #endif -#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ -// BEGIN FLOAT -__DEVICE__ -inline -float abs(float x) { return __ocml_fabs_f32(x); } -__DEVICE__ -inline -float acosf(float x) { return __ocml_acos_f32(x); } -__DEVICE__ -inline -float acoshf(float x) { return __ocml_acosh_f32(x); } -__DEVICE__ -inline -float asinf(float x) { return __ocml_asin_f32(x); } -__DEVICE__ -inline -float asinhf(float x) { return __ocml_asinh_f32(x); } -__DEVICE__ -inline -float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); } -__DEVICE__ -inline -float atanf(float x) { return __ocml_atan_f32(x); } -__DEVICE__ -inline -float atanhf(float x) { return __ocml_atanh_f32(x); } -__DEVICE__ -inline -float cbrtf(float x) { return __ocml_cbrt_f32(x); } -__DEVICE__ -inline -float ceilf(float x) { return __ocml_ceil_f32(x); } -__DEVICE__ -inline -float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); } -__DEVICE__ -inline -float cosf(float x) { return __ocml_cos_f32(x); } -__DEVICE__ -inline -float coshf(float x) { return __ocml_cosh_f32(x); } -__DEVICE__ -inline -float cospif(float x) { return __ocml_cospi_f32(x); } -__DEVICE__ -inline -float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); } -__DEVICE__ -inline -float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); } -__DEVICE__ -inline -float erfcf(float x) { return __ocml_erfc_f32(x); } -__DEVICE__ -inline -float erfcinvf(float x) { return __ocml_erfcinv_f32(x); } -__DEVICE__ -inline -float erfcxf(float x) { return __ocml_erfcx_f32(x); } -__DEVICE__ -inline -float erff(float x) { return __ocml_erf_f32(x); } -__DEVICE__ -inline -float erfinvf(float x) { return __ocml_erfinv_f32(x); } -__DEVICE__ -inline -float exp10f(float x) { return __ocml_exp10_f32(x); } -__DEVICE__ -inline -float exp2f(float x) { return __ocml_exp2_f32(x); } -__DEVICE__ -inline -float expf(float x) { return __ocml_exp_f32(x); } -__DEVICE__ -inline -float expm1f(float x) { return __ocml_expm1_f32(x); } -__DEVICE__ -inline -float fabsf(float x) { return __ocml_fabs_f32(x); } -__DEVICE__ -inline -float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); } -__DEVICE__ -inline -float fdividef(float x, float y) { return x / y; } -__DEVICE__ -inline -float floorf(float x) { return __ocml_floor_f32(x); } -__DEVICE__ -inline -float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); } -__DEVICE__ -inline -float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); } -__DEVICE__ -inline -float fminf(float x, float y) { return __ocml_fmin_f32(x, y); } -__DEVICE__ -inline -float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); } -__DEVICE__ -inline -float frexpf(float x, int* nptr) -{ - int tmp; - float r = - __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp); - *nptr = tmp; - - return r; -} -__DEVICE__ -inline -float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); } -__DEVICE__ -inline -int ilogbf(float x) { return __ocml_ilogb_f32(x); } -__DEVICE__ -inline -__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); } -__DEVICE__ -inline -__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); } -__DEVICE__ -inline -__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); } -__DEVICE__ -inline -float j0f(float x) { return __ocml_j0_f32(x); } -__DEVICE__ -inline -float j1f(float x) { return __ocml_j1_f32(x); } -__DEVICE__ -inline -float jnf(int n, float x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); - - float x0 = j0f(x); - float x1 = j1f(x); - for (int i = 1; i < n; ++i) { - float x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} -__DEVICE__ -inline -float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); } -__DEVICE__ -inline -float lgammaf(float x) { return __ocml_lgamma_f32(x); } -__DEVICE__ -inline -long long int llrintf(float x) { return __ocml_rint_f32(x); } -__DEVICE__ -inline -long long int llroundf(float x) { return __ocml_round_f32(x); } -__DEVICE__ -inline -float log10f(float x) { return __ocml_log10_f32(x); } -__DEVICE__ -inline -float log1pf(float x) { return __ocml_log1p_f32(x); } -__DEVICE__ -inline -float log2f(float x) { return __ocml_log2_f32(x); } -__DEVICE__ -inline -float logbf(float x) { return __ocml_logb_f32(x); } -__DEVICE__ -inline -float logf(float x) { return __ocml_log_f32(x); } -__DEVICE__ -inline -long int lrintf(float x) { return __ocml_rint_f32(x); } -__DEVICE__ -inline -long int lroundf(float x) { return __ocml_round_f32(x); } -__DEVICE__ -inline -float modff(float x, float* iptr) -{ - float tmp; - float r = - __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp); - *iptr = tmp; - - return r; -} -__DEVICE__ -inline -float nanf(const char* tagp) -{ - union { - float val; - struct ieee_float { - uint32_t mantissa : 22; - uint32_t quiet : 1; - uint32_t exponent : 8; - uint32_t sign : 1; - } bits; - - static_assert(sizeof(float) == sizeof(ieee_float), ""); - } tmp; - - tmp.bits.sign = 0u; - tmp.bits.exponent = ~0u; - tmp.bits.quiet = 1u; - tmp.bits.mantissa = __make_mantissa(tagp); - - return tmp.val; -} -__DEVICE__ -inline -float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } -__DEVICE__ -inline -float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); } -__DEVICE__ -inline -float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); } -__DEVICE__ -inline -float norm4df(float x, float y, float z, float w) -{ - return __ocml_len4_f32(x, y, z, w); -} -__DEVICE__ -inline -float normcdff(float x) { return __ocml_ncdf_f32(x); } -__DEVICE__ -inline -float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); } -__DEVICE__ -inline -float normf(int dim, const float* a) -{ // TODO: placeholder until OCML adds support. - float r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_sqrt_f32(r); -} -__DEVICE__ -inline -float powf(float x, float y) { return __ocml_pow_f32(x, y); } -__DEVICE__ -inline -float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); } -__DEVICE__ -inline -float rcbrtf(float x) { return __ocml_rcbrt_f32(x); } -__DEVICE__ -inline -float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); } -__DEVICE__ -inline -float remquof(float x, float y, int* quo) -{ - int tmp; - float r = - __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp); - *quo = tmp; - - return r; -} -__DEVICE__ -inline -float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); } -__DEVICE__ -inline -float rintf(float x) { return __ocml_rint_f32(x); } -__DEVICE__ -inline -float rnorm3df(float x, float y, float z) -{ - return __ocml_rlen3_f32(x, y, z); -} - -__DEVICE__ -inline -float rnorm4df(float x, float y, float z, float w) -{ - return __ocml_rlen4_f32(x, y, z, w); -} -__DEVICE__ -inline -float rnormf(int dim, const float* a) -{ // TODO: placeholder until OCML adds support. - float r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_rsqrt_f32(r); -} -__DEVICE__ -inline -float roundf(float x) { return __ocml_round_f32(x); } -__DEVICE__ -inline -float rsqrtf(float x) { return __ocml_rsqrt_f32(x); } -__DEVICE__ -inline -float scalblnf(float x, long int n) -{ - return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n); -} -__DEVICE__ -inline -float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); } -__DEVICE__ -inline -__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); } -__DEVICE__ -inline -void sincosf(float x, float* sptr, float* cptr) -{ - float tmp; - - *sptr = - __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -void sincospif(float x, float* sptr, float* cptr) -{ - float tmp; - - *sptr = - __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -float sinf(float x) { return __ocml_sin_f32(x); } -__DEVICE__ -inline -float sinhf(float x) { return __ocml_sinh_f32(x); } -__DEVICE__ -inline -float sinpif(float x) { return __ocml_sinpi_f32(x); } -__DEVICE__ -inline -float sqrtf(float x) { return __ocml_sqrt_f32(x); } -__DEVICE__ -inline -float tanf(float x) { return __ocml_tan_f32(x); } -__DEVICE__ -inline -float tanhf(float x) { return __ocml_tanh_f32(x); } -__DEVICE__ -inline -float tgammaf(float x) { return __ocml_tgamma_f32(x); } -__DEVICE__ -inline -float truncf(float x) { return __ocml_trunc_f32(x); } -__DEVICE__ -inline -float y0f(float x) { return __ocml_y0_f32(x); } -__DEVICE__ -inline -float y1f(float x) { return __ocml_y1_f32(x); } -__DEVICE__ -inline -float ynf(int n, float x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. Placeholder until OCML adds - // support. - if (n == 0) return y0f(x); - if (n == 1) return y1f(x); - - float x0 = y0f(x); - float x1 = y1f(x); - for (int i = 1; i < n; ++i) { - float x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} - -// BEGIN INTRINSICS -__DEVICE__ -inline -float __cosf(float x) { return __ocml_native_cos_f32(x); } -__DEVICE__ -inline -float __exp10f(float x) { return __ocml_native_exp10_f32(x); } -__DEVICE__ -inline -float __expf(float x) { return __ocml_native_exp_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fadd_rn(float x, float y) { return x + y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } -__DEVICE__ -inline -float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } -__DEVICE__ -inline -float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fdiv_rn(float x, float y) { return x / y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } -__DEVICE__ -inline -float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } -#endif -__DEVICE__ -inline -float __fdividef(float x, float y) { return x / y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fmaf_rd(float x, float y, float z) -{ - return __ocml_fma_rtn_f32(x, y, z); -} -#endif -__DEVICE__ -inline -float __fmaf_rn(float x, float y, float z) -{ - return __ocml_fma_f32(x, y, z); -} -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fmaf_ru(float x, float y, float z) -{ - return __ocml_fma_rtp_f32(x, y, z); -} -__DEVICE__ -inline -float __fmaf_rz(float x, float y, float z) -{ - return __ocml_fma_rtz_f32(x, y, z); -} -__DEVICE__ -inline -float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fmul_rn(float x, float y) { return x * y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } -__DEVICE__ -inline -float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } -__DEVICE__ -inline -float __frcp_rd(float x) { return __builtin_amdgcn_rcpf(x); } -#endif -__DEVICE__ -inline -float __frcp_rn(float x) { return __builtin_amdgcn_rcpf(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __frcp_ru(float x) { return __builtin_amdgcn_rcpf(x); } -__DEVICE__ -inline -float __frcp_rz(float x) { return __builtin_amdgcn_rcpf(x); } -#endif -__DEVICE__ -inline -float __frsqrt_rn(float x) { return __builtin_amdgcn_rsqf(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } -#endif -__DEVICE__ -inline -float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } -__DEVICE__ -inline -float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } -__DEVICE__ -inline -float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } -#endif -__DEVICE__ -inline -float __fsub_rn(float x, float y) { return x - y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } -__DEVICE__ -inline -float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } -#endif -__DEVICE__ -inline -float __log10f(float x) { return __ocml_native_log10_f32(x); } -__DEVICE__ -inline -float __log2f(float x) { return __ocml_native_log2_f32(x); } -__DEVICE__ -inline -float __logf(float x) { return __ocml_native_log_f32(x); } -__DEVICE__ -inline -float __powf(float x, float y) { return __ocml_pow_f32(x, y); } -__DEVICE__ -inline -float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); } -__DEVICE__ -inline -void __sincosf(float x, float* sptr, float* cptr) -{ - *sptr = __ocml_native_sin_f32(x); - *cptr = __ocml_native_cos_f32(x); -} -__DEVICE__ -inline -float __sinf(float x) { return __ocml_native_sin_f32(x); } -__DEVICE__ -inline -float __tanf(float x) { return __ocml_tan_f32(x); } -// END INTRINSICS -// END FLOAT - -// BEGIN DOUBLE -__DEVICE__ -inline -double abs(double x) { return __ocml_fabs_f64(x); } -__DEVICE__ -inline -double acos(double x) { return __ocml_acos_f64(x); } -__DEVICE__ -inline -double acosh(double x) { return __ocml_acosh_f64(x); } -__DEVICE__ -inline -double asin(double x) { return __ocml_asin_f64(x); } -__DEVICE__ -inline -double asinh(double x) { return __ocml_asinh_f64(x); } -__DEVICE__ -inline -double atan(double x) { return __ocml_atan_f64(x); } -__DEVICE__ -inline -double atan2(double x, double y) { return __ocml_atan2_f64(x, y); } -__DEVICE__ -inline -double atanh(double x) { return __ocml_atanh_f64(x); } -__DEVICE__ -inline -double cbrt(double x) { return __ocml_cbrt_f64(x); } -__DEVICE__ -inline -double ceil(double x) { return __ocml_ceil_f64(x); } -__DEVICE__ -inline -double copysign(double x, double y) { return __ocml_copysign_f64(x, y); } -__DEVICE__ -inline -double cos(double x) { return __ocml_cos_f64(x); } -__DEVICE__ -inline -double cosh(double x) { return __ocml_cosh_f64(x); } -__DEVICE__ -inline -double cospi(double x) { return __ocml_cospi_f64(x); } -__DEVICE__ -inline -double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); } -__DEVICE__ -inline -double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); } -__DEVICE__ -inline -double erf(double x) { return __ocml_erf_f64(x); } -__DEVICE__ -inline -double erfc(double x) { return __ocml_erfc_f64(x); } -__DEVICE__ -inline -double erfcinv(double x) { return __ocml_erfcinv_f64(x); } -__DEVICE__ -inline -double erfcx(double x) { return __ocml_erfcx_f64(x); } -__DEVICE__ -inline -double erfinv(double x) { return __ocml_erfinv_f64(x); } -__DEVICE__ -inline -double exp(double x) { return __ocml_exp_f64(x); } -__DEVICE__ -inline -double exp10(double x) { return __ocml_exp10_f64(x); } -__DEVICE__ -inline -double exp2(double x) { return __ocml_exp2_f64(x); } -__DEVICE__ -inline -double expm1(double x) { return __ocml_expm1_f64(x); } -__DEVICE__ -inline -double fabs(double x) { return __ocml_fabs_f64(x); } -__DEVICE__ -inline -double fdim(double x, double y) { return __ocml_fdim_f64(x, y); } -__DEVICE__ -inline -double floor(double x) { return __ocml_floor_f64(x); } -__DEVICE__ -inline -double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); } -__DEVICE__ -inline -double fmax(double x, double y) { return __ocml_fmax_f64(x, y); } -__DEVICE__ -inline -double fmin(double x, double y) { return __ocml_fmin_f64(x, y); } -__DEVICE__ -inline -double fmod(double x, double y) { return __ocml_fmod_f64(x, y); } -__DEVICE__ -inline -double frexp(double x, int* nptr) -{ - int tmp; - double r = - __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp); - *nptr = tmp; - - return r; -} -__DEVICE__ -inline -double hypot(double x, double y) { return __ocml_hypot_f64(x, y); } -__DEVICE__ -inline -int ilogb(double x) { return __ocml_ilogb_f64(x); } -__DEVICE__ -inline -__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); } -__DEVICE__ -inline -__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); } -__DEVICE__ -inline -__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); } -__DEVICE__ -inline -double j0(double x) { return __ocml_j0_f64(x); } -__DEVICE__ -inline -double j1(double x) { return __ocml_j1_f64(x); } -__DEVICE__ -inline -double jn(int n, double x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. Placeholder until OCML adds - // support. - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); - - double x0 = j0f(x); - double x1 = j1f(x); - for (int i = 1; i < n; ++i) { - double x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} -__DEVICE__ -inline -double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); } -__DEVICE__ -inline -double lgamma(double x) { return __ocml_lgamma_f64(x); } -__DEVICE__ -inline -long long int llrint(double x) { return __ocml_rint_f64(x); } -__DEVICE__ -inline -long long int llround(double x) { return __ocml_round_f64(x); } -__DEVICE__ -inline -double log(double x) { return __ocml_log_f64(x); } -__DEVICE__ -inline -double log10(double x) { return __ocml_log10_f64(x); } -__DEVICE__ -inline -double log1p(double x) { return __ocml_log1p_f64(x); } -__DEVICE__ -inline -double log2(double x) { return __ocml_log2_f64(x); } -__DEVICE__ -inline -double logb(double x) { return __ocml_logb_f64(x); } -__DEVICE__ -inline -long int lrint(double x) { return __ocml_rint_f64(x); } -__DEVICE__ -inline -long int lround(double x) { return __ocml_round_f64(x); } -__DEVICE__ -inline -double modf(double x, double* iptr) -{ - double tmp; - double r = - __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp); - *iptr = tmp; - - return r; -} -__DEVICE__ -inline -double nan(const char* tagp) -{ -#if !_WIN32 - union { - double val; - struct ieee_double { - uint64_t mantissa : 51; - uint32_t quiet : 1; - uint32_t exponent : 11; - uint32_t sign : 1; - } bits; - static_assert(sizeof(double) == sizeof(ieee_double), ""); - } tmp; - - tmp.bits.sign = 0u; - tmp.bits.exponent = ~0u; - tmp.bits.quiet = 1u; - tmp.bits.mantissa = __make_mantissa(tagp); - - return tmp.val; -#else - static_assert(sizeof(uint64_t)==sizeof(double)); - uint64_t val = __make_mantissa(tagp); - val |= 0xFFF << 51; - return *reinterpret_cast(&val); -#endif -} -__DEVICE__ -inline -double nearbyint(double x) { return __ocml_nearbyint_f64(x); } -__DEVICE__ -inline -double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); } -__DEVICE__ -inline -double norm(int dim, const double* a) -{ // TODO: placeholder until OCML adds support. - double r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_sqrt_f64(r); -} -__DEVICE__ -inline -double norm3d(double x, double y, double z) -{ - return __ocml_len3_f64(x, y, z); -} -__DEVICE__ -inline -double norm4d(double x, double y, double z, double w) -{ - return __ocml_len4_f64(x, y, z, w); -} -__DEVICE__ -inline -double normcdf(double x) { return __ocml_ncdf_f64(x); } -__DEVICE__ -inline -double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); } -__DEVICE__ -inline -double pow(double x, double y) { return __ocml_pow_f64(x, y); } -__DEVICE__ -inline -double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); } -__DEVICE__ -inline -double rcbrt(double x) { return __ocml_rcbrt_f64(x); } -__DEVICE__ -inline -double remainder(double x, double y) { return __ocml_remainder_f64(x, y); } -__DEVICE__ -inline -double remquo(double x, double y, int* quo) -{ - int tmp; - double r = - __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp); - *quo = tmp; - - return r; -} -__DEVICE__ -inline -double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); } -__DEVICE__ -inline -double rint(double x) { return __ocml_rint_f64(x); } -__DEVICE__ -inline -double rnorm(int dim, const double* a) -{ // TODO: placeholder until OCML adds support. - double r = 0; - while (dim--) { r += a[0] * a[0]; ++a; } - - return __ocml_rsqrt_f64(r); -} -__DEVICE__ -inline -double rnorm3d(double x, double y, double z) -{ - return __ocml_rlen3_f64(x, y, z); -} -__DEVICE__ -inline -double rnorm4d(double x, double y, double z, double w) -{ - return __ocml_rlen4_f64(x, y, z, w); -} -__DEVICE__ -inline -double round(double x) { return __ocml_round_f64(x); } -__DEVICE__ -inline -double rsqrt(double x) { return __ocml_rsqrt_f64(x); } -__DEVICE__ -inline -double scalbln(double x, long int n) -{ - return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n); -} -__DEVICE__ -inline -double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); } -__DEVICE__ -inline -__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); } -__DEVICE__ -inline -double sin(double x) { return __ocml_sin_f64(x); } -__DEVICE__ -inline -void sincos(double x, double* sptr, double* cptr) -{ - double tmp; - *sptr = - __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -void sincospi(double x, double* sptr, double* cptr) -{ - double tmp; - *sptr = __ocml_sincospi_f64( - x, (__attribute__((address_space(5))) double*) &tmp); - *cptr = tmp; -} -__DEVICE__ -inline -double sinh(double x) { return __ocml_sinh_f64(x); } -__DEVICE__ -inline -double sinpi(double x) { return __ocml_sinpi_f64(x); } -__DEVICE__ -inline -double sqrt(double x) { return __ocml_sqrt_f64(x); } -__DEVICE__ -inline -double tan(double x) { return __ocml_tan_f64(x); } -__DEVICE__ -inline -double tanh(double x) { return __ocml_tanh_f64(x); } -__DEVICE__ -inline -double tgamma(double x) { return __ocml_tgamma_f64(x); } -__DEVICE__ -inline -double trunc(double x) { return __ocml_trunc_f64(x); } -__DEVICE__ -inline -double y0(double x) { return __ocml_y0_f64(x); } -__DEVICE__ -inline -double y1(double x) { return __ocml_y1_f64(x); } -__DEVICE__ -inline -double yn(int n, double x) -{ // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm - // for linear recurrences to get O(log n) steps, but it's unclear if - // it'd be beneficial in this case. Placeholder until OCML adds - // support. - if (n == 0) return j0f(x); - if (n == 1) return j1f(x); - - double x0 = j0f(x); - double x1 = j1f(x); - for (int i = 1; i < n; ++i) { - double x2 = (2 * i) / x * x1 - x0; - x0 = x1; - x1 = x2; - } - - return x1; -} - -// BEGIN INTRINSICS -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __dadd_rn(double x, double y) { return x + y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } -__DEVICE__ -inline -double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } -__DEVICE__ -inline -double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __ddiv_rn(double x, double y) { return x / y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } -__DEVICE__ -inline -double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } -__DEVICE__ -inline -double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __dmul_rn(double x, double y) { return x * y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } -__DEVICE__ -inline -double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } -__DEVICE__ -inline -double __drcp_rd(double x) { return __builtin_amdgcn_rcp(x); } -#endif -__DEVICE__ -inline -double __drcp_rn(double x) { return __builtin_amdgcn_rcp(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __drcp_ru(double x) { return __builtin_amdgcn_rcp(x); } -__DEVICE__ -inline -double __drcp_rz(double x) { return __builtin_amdgcn_rcp(x); } -__DEVICE__ -inline -double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } -#endif -__DEVICE__ -inline -double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } -__DEVICE__ -inline -double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } -__DEVICE__ -inline -double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } -#endif -__DEVICE__ -inline -double __dsub_rn(double x, double y) { return x - y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } -__DEVICE__ -inline -double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } -__DEVICE__ -inline -double __fma_rd(double x, double y, double z) -{ - return __ocml_fma_rtn_f64(x, y, z); -} -#endif -__DEVICE__ -inline -double __fma_rn(double x, double y, double z) -{ - return __ocml_fma_f64(x, y, z); -} -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline -double __fma_ru(double x, double y, double z) -{ - return __ocml_fma_rtp_f64(x, y, z); -} -__DEVICE__ -inline -double __fma_rz(double x, double y, double z) -{ - return __ocml_fma_rtz_f64(x, y, z); -} -#endif -// END INTRINSICS -// END DOUBLE - -// BEGIN INTEGER -__DEVICE__ -inline -int abs(int x) -{ - int sgn = x >> (sizeof(int) * CHAR_BIT - 1); - return (x ^ sgn) - sgn; -} -__DEVICE__ -inline -long labs(long x) -{ - long sgn = x >> (sizeof(long) * CHAR_BIT - 1); - return (x ^ sgn) - sgn; -} -__DEVICE__ -inline -long long llabs(long long x) -{ - long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1); - return (x ^ sgn) - sgn; -} - -#if defined(__cplusplus) - __DEVICE__ - inline - long abs(long x) { return labs(x); } - __DEVICE__ - inline - long long abs(long long x) { return llabs(x); } -#endif -// END INTEGER - -__DEVICE__ -inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) { - return __ocml_fma_f16(x, y, z); -} - -__DEVICE__ -inline float fma(float x, float y, float z) { - return fmaf(x, y, z); -} - -#pragma push_macro("__DEF_FLOAT_FUN") -#pragma push_macro("__DEF_FLOAT_FUN2") -#pragma push_macro("__DEF_FLOAT_FUN2I") -#pragma push_macro("__HIP_OVERLOAD") -#pragma push_macro("__HIP_OVERLOAD2") - -// __hip_enable_if::type is a type function which returns __T if __B is true. -template -struct __hip_enable_if {}; - -template struct __hip_enable_if { - typedef __T type; -}; - -// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to -// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with -// floor(double). -#define __HIP_OVERLOAD1(__retty, __fn) \ - template \ - __DEVICE__ \ - typename __hip_enable_if::is_integer, \ - __retty>::type \ - __fn(__T __x) { \ - return ::__fn((double)__x); \ - } - -// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double -// or integer argument to avoid compilation error due to ambibuity. e.g. -// max(5.0f, 6.0) is resolved with max(double, double). -#define __HIP_OVERLOAD2(__retty, __fn) \ - template \ - __DEVICE__ typename __hip_enable_if< \ - std::numeric_limits<__T1>::is_specialized && \ - std::numeric_limits<__T2>::is_specialized, \ - __retty>::type \ - __fn(__T1 __x, __T2 __y) { \ - return __fn((double)__x, (double)__y); \ - } - -// Define cmath functions with float argument and returns float. -#define __DEF_FUN1(retty, func) \ -__DEVICE__ \ -inline \ -float func(float x) \ -{ \ - return func##f(x); \ -} \ -__HIP_OVERLOAD1(retty, func) - -// Define cmath functions with float argument and returns retty. -#define __DEF_FUNI(retty, func) \ -__DEVICE__ \ -inline \ -retty func(float x) \ -{ \ - return func##f(x); \ -} \ -__HIP_OVERLOAD1(retty, func) - -// define cmath functions with two float arguments. -#define __DEF_FUN2(retty, func) \ -__DEVICE__ \ -inline \ -float func(float x, float y) \ -{ \ - return func##f(x, y); \ -} \ -__HIP_OVERLOAD2(retty, func) - -__DEF_FUN1(double, acos) -__DEF_FUN1(double, acosh) -__DEF_FUN1(double, asin) -__DEF_FUN1(double, asinh) -__DEF_FUN1(double, atan) -__DEF_FUN2(double, atan2); -__DEF_FUN1(double, atanh) -__DEF_FUN1(double, cbrt) -__DEF_FUN1(double, ceil) -__DEF_FUN2(double, copysign); -__DEF_FUN1(double, cos) -__DEF_FUN1(double, cosh) -__DEF_FUN1(double, erf) -__DEF_FUN1(double, erfc) -__DEF_FUN1(double, exp) -__DEF_FUN1(double, exp2) -__DEF_FUN1(double, expm1) -__DEF_FUN1(double, fabs) -__DEF_FUN2(double, fdim); -__DEF_FUN1(double, floor) -__DEF_FUN2(double, fmax); -__DEF_FUN2(double, fmin); -__DEF_FUN2(double, fmod); -//__HIP_OVERLOAD1(int, fpclassify) -__DEF_FUN2(double, hypot); -__DEF_FUNI(int, ilogb) -__HIP_OVERLOAD1(bool, isfinite) -__HIP_OVERLOAD2(bool, isgreater); -__HIP_OVERLOAD2(bool, isgreaterequal); -__HIP_OVERLOAD1(bool, isinf); -__HIP_OVERLOAD2(bool, isless); -__HIP_OVERLOAD2(bool, islessequal); -__HIP_OVERLOAD2(bool, islessgreater); -__HIP_OVERLOAD1(bool, isnan); -//__HIP_OVERLOAD1(bool, isnormal) -__HIP_OVERLOAD2(bool, isunordered); -__DEF_FUN1(double, lgamma) -__DEF_FUN1(double, log) -__DEF_FUN1(double, log10) -__DEF_FUN1(double, log1p) -__DEF_FUN1(double, log2) -__DEF_FUN1(double, logb) -__DEF_FUNI(long long, llrint) -__DEF_FUNI(long long, llround) -__DEF_FUNI(long, lrint) -__DEF_FUNI(long, lround) -__DEF_FUN1(double, nearbyint); -__DEF_FUN2(double, nextafter); -__DEF_FUN2(double, pow); -__DEF_FUN2(double, remainder); -__DEF_FUN1(double, rint); -__DEF_FUN1(double, round); -__HIP_OVERLOAD1(bool, signbit) -__DEF_FUN1(double, sin) -__DEF_FUN1(double, sinh) -__DEF_FUN1(double, sqrt) -__DEF_FUN1(double, tan) -__DEF_FUN1(double, tanh) -__DEF_FUN1(double, tgamma) -__DEF_FUN1(double, trunc); - -// define cmath functions with a float and an integer argument. -#define __DEF_FLOAT_FUN2I(func) \ -__DEVICE__ \ -inline \ -float func(float x, int y) \ -{ \ - return func##f(x, y); \ -} -__DEF_FLOAT_FUN2I(scalbn) -__DEF_FLOAT_FUN2I(ldexp) - -template -__DEVICE__ inline T min(T arg1, T arg2) { - return (arg1 < arg2) ? arg1 : arg2; -} - -template -__DEVICE__ inline T max(T arg1, T arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - -__DEVICE__ inline int min(int arg1, int arg2) { - return (arg1 < arg2) ? arg1 : arg2; -} -__DEVICE__ inline int max(int arg1, int arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - -__DEVICE__ inline int min(uint32_t arg1, int arg2) { - return (arg1 < arg2) ? arg1 : arg2; -} -__DEVICE__ inline int max(uint32_t arg1, int arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - -__DEVICE__ -inline -float max(float x, float y) { - return fmaxf(x, y); -} - -__DEVICE__ -inline -double max(double x, double y) { - return fmax(x, y); -} - -__DEVICE__ -inline -float min(float x, float y) { - return fminf(x, y); -} - -__DEVICE__ -inline -double min(double x, double y) { - return fmin(x, y); -} - -__HIP_OVERLOAD2(double, max) -__HIP_OVERLOAD2(double, min) - -#if !defined(__HIPCC_RTC__) -__host__ inline static int min(int arg1, int arg2) { - return std::min(arg1, arg2); -} - -__host__ inline static int max(int arg1, int arg2) { - return std::max(arg1, arg2); -} -#endif // !defined(__HIPCC_RTC__) - -__DEVICE__ -inline float pow(float base, int iexp) { - return powif(base, iexp); -} - -__DEVICE__ -inline double pow(double base, int iexp) { - return powi(base, iexp); -} - -__DEVICE__ -inline _Float16 pow(_Float16 base, int iexp) { - return __ocml_pown_f16(base, iexp); -} - -#pragma pop_macro("__DEF_FLOAT_FUN") -#pragma pop_macro("__DEF_FLOAT_FUN2") -#pragma pop_macro("__DEF_FLOAT_FUN2I") -#pragma pop_macro("__HIP_OVERLOAD") -#pragma pop_macro("__HIP_OVERLOAD2") - -#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__ - #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") - // For backward compatibility. // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros // defined after including math_functions.h. +#if !defined(__HIPCC_RTC__) #include +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h index bdba2ef16e..5974fb6374 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2018 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,12 +23,33 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wc++17-extensions" +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wunused-template" +#endif + #if defined(__cplusplus) +#if !defined(__HIPCC_RTC__) #include #include #include #include +#endif + +#if defined(__HIPCC_RTC__) +#define __HOST_DEVICE__ __device__ +#else +#define __HOST_DEVICE__ __host__ __device__ +#endif #define __HIP_SURFACE_OBJECT_PARAMETERS_INIT \ unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObj; @@ -232,4 +253,9 @@ static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfa } #endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + #endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h index 0222870590..c055ad1d8a 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h +++ b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -31,7 +31,16 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wold-style-cast" +#endif + +#if !defined(__HIPCC_RTC__) #include "hip/amd_detail/host_defines.h" +#endif typedef unsigned char uchar; typedef unsigned short ushort; @@ -128,4 +137,8 @@ __device__ inline static __local void* __to_local(unsigned x) { return (__local #define __CLK_LOCAL_MEM_FENCE 0x01 typedef unsigned __cl_mem_fence_flags; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + #endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h b/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h index 9e999268ea..6c471c5fa1 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h +++ b/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,13 +22,22 @@ THE SOFTWARE. #pragma once +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +#if !defined(__HIPCC_RTC__) #include "host_defines.h" +#endif + #if defined(__cplusplus) extern "C" { #endif // DOT FUNCTIONS -#if __HIP_CLANG_ONLY__ +#if defined(__clang__) && defined(__HIP__) __device__ __attribute__((const)) int __ockl_sdot2( @@ -692,3 +701,7 @@ double __ocml_fma_rtz_f64(double, double, double); #if defined(__cplusplus) } // extern "C" #endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h b/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h index a3fa616cc5..136c6e352f 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h +++ b/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,7 +22,14 @@ THE SOFTWARE. #pragma once +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +#if !defined(__HIPCC_RTC__) #include +#endif extern "C" { @@ -172,4 +179,8 @@ __device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT __device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); -}; \ No newline at end of file +} + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif \ No newline at end of file diff --git a/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h b/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h index f1cd70ab05..baf2b28039 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,13 +22,24 @@ THE SOFTWARE. #pragma once +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++17-extensions" +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wextra-semi-stmt" +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wunused-template" +#endif + #if defined(__cplusplus) +#if !defined(__HIPCC_RTC__) #include #include #include - -#if !defined(__HIPCC_RTC__) #include #endif // !defined(__HIPCC_RTC__) @@ -490,3 +501,7 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t #include #include #include - -#if !defined(__HIPCC_RTC__) #include #endif // !defined(__HIPCC_RTC__) @@ -208,7 +218,7 @@ static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject return __hipMapFrom(tmp); break; } - }; + } return {}; } @@ -477,3 +487,7 @@ static __device__ __hip_img_chk__ void texCubemapLayeredGrad(T *ptr, hipTextureO } #endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif \ No newline at end of file diff --git a/projects/clr/hipamd/packaging/CMakeLists.txt b/projects/clr/hipamd/packaging/CMakeLists.txt index d0eb8fe27d..cef02d4296 100644 --- a/projects/clr/hipamd/packaging/CMakeLists.txt +++ b/projects/clr/hipamd/packaging/CMakeLists.txt @@ -60,7 +60,7 @@ if(HIP_PLATFORM STREQUAL "amd" ) #TODO:This do not belong in BINARY package. #Keeping it as is for now -install(FILES ${CMAKE_BINARY_DIR}/hipamd/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) +install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install ( EXPORT hip-targets FILE hip-targets.cmake NAMESPACE hip:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT binary) install ( EXPORT hip-lang-targets FILE hip-lang-targets.cmake NAMESPACE hip-lang:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip-lang COMPONENT binary) @@ -94,11 +94,11 @@ install(FILES ${CMAKE_BINARY_DIR}/hipamd/include/hip/amd_detail/hip_prof_str.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/amd_detail COMPONENT dev) install(FILES ${CMAKE_BINARY_DIR}/hipamd/include/hip/hip_version.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) -install(FILES ${CMAKE_BINARY_DIR}/hipamd/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip COMPONENT dev) +install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip COMPONENT dev) # .hipVersion is added to satisfy Windows compute build. #TODO to be removed if(WIN32) - install(FILES ${CMAKE_BINARY_DIR}/hipamd/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion COMPONENT dev) + install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion COMPONENT dev) endif() install(DIRECTORY ${HIP_COMMON_DIR}/cmake/ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT dev) install(FILES ${CMAKE_BINARY_DIR}/hipamd/hip-config.cmake ${CMAKE_BINARY_DIR}/hipamd/hip-config-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT dev) diff --git a/projects/clr/hipamd/src/CMakeLists.txt b/projects/clr/hipamd/src/CMakeLists.txt index bd791e2774..97f4d0ea36 100644 --- a/projects/clr/hipamd/src/CMakeLists.txt +++ b/projects/clr/hipamd/src/CMakeLists.txt @@ -261,9 +261,11 @@ endif() target_compile_definitions(amdhip64 PUBLIC USE_PROF_API=1) add_custom_command(TARGET amdhip64 POST_BUILD COMMAND - ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/.hipInfo ${PROJECT_BINARY_DIR}/lib/.hipInfo) + ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/share/hip/.hipInfo ${PROJECT_BINARY_DIR}/lib/.hipInfo) add_custom_command(TARGET amdhip64 POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/include ${PROJECT_BINARY_DIR}/include) +add_custom_command(TARGET amdhip64 POST_BUILD COMMAND + ${CMAKE_COMMAND} -E copy_directory ${HIP_COMMON_INCLUDE_DIR} ${PROJECT_BINARY_DIR}/include) add_library(host INTERFACE) target_link_libraries(host INTERFACE amdhip64) diff --git a/projects/clr/hipamd/src/hiprtc/CMakeLists.txt b/projects/clr/hipamd/src/hiprtc/CMakeLists.txt index 8bafd2ece2..39c1c6a12a 100644 --- a/projects/clr/hipamd/src/hiprtc/CMakeLists.txt +++ b/projects/clr/hipamd/src/hiprtc/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2020 - 2023 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -129,12 +129,33 @@ set(HIPRTC_GEN_HEADER "${HIPRTC_GEN_DIR}/hipRTC_header.h") set(HIPRTC_GEN_MCIN "${HIPRTC_GEN_DIR}/hipRTC_header.mcin") set(HIPRTC_GEN_PREPROCESSED "${HIPRTC_GEN_DIR}/hipRTC") set(HIPRTC_GEN_OBJ "${HIPRTC_GEN_DIR}/hipRTC_header${CMAKE_CXX_OUTPUT_EXTENSION}") -set(HIPRTC_WARP_FUNCS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_warp_functions.h") -set(HIPRTC_FP16_MATH_FWD "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_fp16_math_fwd.h") -set(HIPRTC_FP16_FUNCS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_fp16.h") -set(HIPRTC_COOP_GROUPS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_cooperative_groups.h") -set(HIPRTC_COOP_GRPS_HELPER "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_cooperative_groups_helper.h") -set(HIPRTC_UNSAFE_ATOMICS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_unsafe_atomics.h") +# list of headers which needs to be appended to the hiprtc preprocessed file +set(HIPRTC_HEADERS +${HIP_COMMON_INCLUDE_DIR}/hip/hip_common.h +${HIP_COMMON_INCLUDE_DIR}/hip/library_types.h +${HIP_COMMON_INCLUDE_DIR}/hip/driver_types.h +${HIP_COMMON_INCLUDE_DIR}/hip/surface_types.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_channel_descriptor.h +${HIP_COMMON_INCLUDE_DIR}/hip/texture_types.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/ockl_image.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/texture_fetch_functions.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/texture_indirect_functions.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_surface_functions.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_complex.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_math_constants.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/math_fwd.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/device_library_decls.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_device_functions.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_warp_functions.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_cooperative_groups_helper.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_cooperative_groups.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_unsafe_atomics.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_atomic.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/math_fwd.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_fp16_math_fwd.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_fp16.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_math_functions.h +) # Generate required HIPRTC files. FILE(MAKE_DIRECTORY ${HIPRTC_GEN_DIR}) @@ -147,7 +168,7 @@ generate_hiprtc_mcin("${HIPRTC_GEN_MCIN}" "${HIPRTC_GEN_PREPROCESSED}") add_custom_command( OUTPUT ${HIPRTC_GEN_PREPROCESSED} COMMAND $ -O3 --rocm-path=${PROJECT_SOURCE_DIR}/include/.. -std=c++17 -nogpulib --hip-version=3.6 -isystem ${HIP_COMMON_INCLUDE_DIR} -isystem ${PROJECT_SOURCE_DIR}/include -isystem ${PROJECT_BINARY_DIR}/include -isystem ${CMAKE_CURRENT_SOURCE_DIR}/include --cuda-device-only -D__HIPCC_RTC__ -x hip ${HIPRTC_GEN_HEADER} -E -o ${HIPRTC_GEN_PREPROCESSED} - COMMAND ${CMAKE_COMMAND} -DHIPRTC_ADD_MACROS=1 -DHIPRTC_WARP_HEADER_FILE=${HIPRTC_WARP_FUNCS} -DHIPRTC_COOP_HEADER_FILE=${HIPRTC_COOP_GROUPS} -DHIPRTC_COOP_HELPER_FILE=${HIPRTC_COOP_GRPS_HELPER} -DHIPRTC_UNSAFE_ATOMICS_FILE=${HIPRTC_UNSAFE_ATOMICS} -DHIPRTC_FP16_MATH_FWD_FILE=${HIPRTC_FP16_MATH_FWD} -DHIPRTC_FP16_HEADER_FILE=${HIPRTC_FP16_FUNCS} -DHIPRTC_PREPROCESSED_FILE=${HIPRTC_GEN_PREPROCESSED} -P ${HIPRTC_CMAKE} + COMMAND ${CMAKE_COMMAND} -DHIPRTC_ADD_MACROS=1 -DHIPRTC_HEADERS="${HIPRTC_HEADERS}" -DHIPRTC_PREPROCESSED_FILE=${HIPRTC_GEN_PREPROCESSED} -P ${HIPRTC_CMAKE} DEPENDS clang ${HIPRTC_GEN_HEADER}) add_custom_command( OUTPUT ${HIPRTC_GEN_OBJ} diff --git a/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake b/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake index 730d013e28..187e961648 100644 --- a/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake +++ b/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake @@ -30,6 +30,8 @@ function(get_hiprtc_macros HIPRTC_DEFINES) "#pragma clang diagnostic push\n\ #pragma clang diagnostic ignored \"-Wreserved-id-macro\"\n\ #pragma clang diagnostic ignored \"-Wc++98-compat-pedantic\"\n\ +#pragma clang diagnostic ignored \"-Wreserved-macro-identifier\"\n\ +#pragma clang diagnostic ignored \"-Wundef\"\n\ #define __device__ __attribute__((device))\n\ #define __host__ __attribute__((host))\n\ #define __global__ __attribute__((global))\n\ @@ -40,7 +42,11 @@ function(get_hiprtc_macros HIPRTC_DEFINES) #define __noinline__ __attribute__((noinline))\n\ #endif\n\ #define __forceinline__ inline __attribute__((always_inline))\n\ - +#if __HIP_NO_IMAGE_SUPPORT\n\ +#define __hip_img_chk__ __attribute__((unavailable(\"The image/texture API not supported on the device\")))\n\ +#else\n\ +#define __hip_img_chk__\n\ +#endif\n\ #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \\\n\ __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))\n\ #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \\\n\ @@ -49,13 +55,11 @@ function(get_hiprtc_macros HIPRTC_DEFINES) #define select_impl_(_1, _2, impl_, ...) impl_\n\ #define __launch_bounds__(...) \\\n\ select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) \n\ -#pragma clang diagnostic pop\n\ #define HIP_INCLUDE_HIP_HIP_RUNTIME_H\n\ -#pragma clang diagnostic push\n\ -#pragma clang diagnostic ignored \"-Wreserved-macro-identifier\"\n\ #define _HIP_BFLOAT16_H_\n\ -#pragma clang diagnostic pop\n\ -#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H" +#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H\n\ +#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H\n\ +#pragma clang diagnostic pop" PARENT_SCOPE) endfunction(get_hiprtc_macros) @@ -64,20 +68,15 @@ if(HIPRTC_ADD_MACROS) message(STATUS "Appending hiprtc macros to ${HIPRTC_PREPROCESSED_FILE}.") get_hiprtc_macros(HIPRTC_DEFINES) FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_DEFINES}") - FILE(READ "${HIPRTC_WARP_HEADER_FILE}" HIPRTC_WARP_HEADER) - FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_WARP_HEADER}") #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wreserved-macro-identifier" - FILE(READ "${HIPRTC_COOP_HELPER_FILE}" HIPRTC_COOP_HELPER) - FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_COOP_HELPER}") - FILE(READ "${HIPRTC_COOP_HEADER_FILE}" HIPRTC_COOP_HEADER) - FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_COOP_HEADER}") - FILE(READ "${HIPRTC_UNSAFE_ATOMICS_FILE}" HIPRTC_UNSAFE_ATOMICS) - FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_UNSAFE_ATOMICS}") - FILE(READ "${HIPRTC_FP16_MATH_FWD_FILE}" HIPRTC_FP16_MATH_FWD) - FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_FP16_MATH_FWD}") - FILE(READ "${HIPRTC_FP16_HEADER_FILE}" HIPRTC_FP16_HEADER) - FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_FP16_HEADER}") + set(HIPRTC_HEADER_LIST ${HIPRTC_HEADERS}) + separate_arguments(HIPRTC_HEADER_LIST) +# appends all the headers from the list to the hiprtc preprocessed file + foreach(header ${HIPRTC_HEADER_LIST}) + FILE(READ "${header}" HEADER_FILE) + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HEADER_FILE}") + endforeach() #pragma clang diagnostic pop endif()