From 78a3dc739d83f4bf06d7533bb06415035f66b981 Mon Sep 17 00:00:00 2001
From: Satyanvesh Dittakavi <Satyanvesh.Dittakavi@amd.com>
Date: Wed, 26 Apr 2023 18:13:36 +0000
Subject: [PATCH] SWDEV-395996 - Add HIPRTC support for missing hip headers

hip headers such as hip_math_constants.h, library_types.h,
hip_common.h, channel_descriptor.h, device_functions.h, hip_complex.h,
hip_texture_types.h, math_functions.h, surface_types.h
are added in HIPRTC

Change-Id: I4a4c198449ceb609c3ff55e00b43056c1f085431


[ROCm/clr commit: d7d0f1131882ea1f42b7c42235b66d88cd9305a1]
---
 projects/clr/hipamd/CMakeLists.txt            |   18 +-
 .../hip/amd_detail/amd_channel_descriptor.h   |   13 +-
 .../hip/amd_detail/amd_device_functions.h     |   31 +-
 .../include/hip/amd_detail/amd_hip_atomic.h   |   14 +
 .../include/hip/amd_detail/amd_hip_complex.h  |   20 +-
 .../include/hip/amd_detail/amd_hip_runtime.h  |   41 +-
 .../hip/amd_detail/amd_math_functions.h       | 1415 +----------------
 .../hip/amd_detail/amd_surface_functions.h    |   28 +-
 .../hip/amd_detail/device_library_decls.h     |   15 +-
 .../hipamd/include/hip/amd_detail/math_fwd.h  |   17 +-
 .../include/hip/amd_detail/ockl_image.h       |   15 +-
 .../hip/amd_detail/texture_fetch_functions.h  |   21 +-
 .../amd_detail/texture_indirect_functions.h   |   22 +-
 projects/clr/hipamd/packaging/CMakeLists.txt  |    6 +-
 projects/clr/hipamd/src/CMakeLists.txt        |    4 +-
 projects/clr/hipamd/src/hiprtc/CMakeLists.txt |   37 +-
 .../clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake  |   35 +-
 17 files changed, 271 insertions(+), 1481 deletions(-)

diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt
index 98432a13cd..194e50a47a 100755
--- a/projects/clr/hipamd/CMakeLists.txt
+++ b/projects/clr/hipamd/CMakeLists.txt
@@ -292,15 +292,15 @@ set(_versionInfoHeader
 ")
 file(WRITE "${PROJECT_BINARY_DIR}/include/hip/hip_version.h" ${_versionInfoHeader})
 
-if(HIP_RUNTIME STREQUAL "rocclr")
-	add_subdirectory(src)
-endif()
-
 # Generate .hipInfo
-file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo})
+file(WRITE "${PROJECT_BINARY_DIR}/share/hip/.hipInfo" ${_buildInfo})
 
 # Generate version
-file(WRITE "${PROJECT_BINARY_DIR}/version" ${_versionInfo})
+file(WRITE "${PROJECT_BINARY_DIR}/share/hip/version" ${_versionInfo})
+
+if(HIP_RUNTIME STREQUAL "rocclr")
+   add_subdirectory(src)
+endif()
 
 # Build doxygen documentation
 find_program(DOXYGEN_EXE doxygen)
@@ -321,14 +321,14 @@ endif()
 #############################
 
 # Install .hipInfo
-install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(FILES ${PROJECT_BINARY_DIR}/share/hip/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR})
 
 # Install version
-install(FILES ${PROJECT_BINARY_DIR}/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip)
+install(FILES ${PROJECT_BINARY_DIR}/share/hip/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip)
 # .hipVersion is added to satisfy Windows compute build.
 #TODO to be removed
 if(WIN32)
-    install(FILES ${PROJECT_BINARY_DIR}/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion)
+    install(FILES ${PROJECT_BINARY_DIR}/share/hip/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion)
 endif()
 
 # Install src, bin, include & cmake if necessary
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h b/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h
index bf2c337901..f5ba75ebe6 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_channel_descriptor.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -23,9 +23,16 @@ THE SOFTWARE.
 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H
 #define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include <hip/hip_common.h>
 #include <hip/driver_types.h>
 #include <hip/amd_detail/amd_hip_vector_types.h>
+#endif
 
 #ifdef __cplusplus
 
@@ -353,4 +360,8 @@ struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
 
 #endif /* __cplusplus */
 
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
 #endif /* !HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H */
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h
index 2dc3d620e8..6f230bfc21 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h
@@ -23,18 +23,32 @@ THE SOFTWARE.
 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H
 #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H
 
-#include "host_defines.h"
-#include "math_fwd.h"
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wsign-conversion"
+#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wshorten-64-to-32"
+#pragma clang diagnostic ignored "-Wimplicit-int-conversion"
+#pragma clang diagnostic ignored "-Wimplicit-float-conversion"
+#pragma clang diagnostic ignored "-Wmissing-noreturn"
+#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
+#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
+#pragma clang diagnostic ignored "-Wshift-count-overflow"
+#endif
 
 #if !defined(__HIPCC_RTC__)
+#include "host_defines.h"
+#include "math_fwd.h"
 #include <hip/hip_runtime_api.h>
 #include <stddef.h>
-#endif // !defined(__HIPCC_RTC__)
-
 #include <hip/hip_vector_types.h>
 #include <hip/amd_detail/device_library_decls.h>
+#endif // !defined(__HIPCC_RTC__)
 
-#if __HIP_CLANG_ONLY__
+#if defined(__clang__) && defined(__HIP__)
 extern "C" __device__ int printf(const char *fmt, ...);
 #else
 template <typename... All>
@@ -639,7 +653,7 @@ __device__ static inline float __ull2float_rz(unsigned long long int x) {
     return __ocml_cvtrtz_f32_u64(x);
 }
 
-#if __HIP_CLANG_ONLY__
+#if defined(__clang__) && defined(__HIP__)
 
 // Clock functions
 __device__ long long int __clock64();
@@ -1105,4 +1119,9 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) {
     return __hip_hc_memset(ptr, val8, size);
 }
 #endif // !__OPENMP_AMDGCN__
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
 #endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h
index 4b4276935d..3f55831f77 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h
@@ -22,7 +22,17 @@ THE SOFTWARE.
 
 #pragma once
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#pragma clang diagnostic ignored "-Wsign-conversion"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include "amd_device_functions.h"
+#endif
 
 #if __has_builtin(__hip_atomic_compare_exchange_strong)
 
@@ -1625,4 +1635,8 @@ unsigned long long atomicXor(
     return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
 }
 
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
 #endif // __hip_atomic_compare_exchange_strong
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h
index 9d9dfd5e9d..933fd4e165 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_complex.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -23,7 +23,21 @@ THE SOFTWARE.
 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
 #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
+#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion"
+#pragma clang diagnostic ignored "-Wimplicit-float-conversion"
+#pragma clang diagnostic ignored "-Wdouble-promotion"
+#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include "hip/amd_detail/amd_hip_vector_types.h"
+#endif
 
 #if defined(__HIPCC_RTC__)
 #define __HOST_DEVICE__ __device__
@@ -311,4 +325,8 @@ __HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDo
     return make_hipDoubleComplex(real, imag);
 }
 
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
 #endif //HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h
index 2606ca0cf0..bd273b6b05 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -110,18 +110,19 @@ using ::int64_t;
 
 #if !defined(__HIPCC_RTC__)
 #include <hip/hip_runtime_api.h>
+#include <hip/amd_detail/amd_hip_atomic.h>
+#include <hip/amd_detail/amd_device_functions.h>
+#include <hip/amd_detail/amd_surface_functions.h>
+#include <hip/amd_detail/texture_fetch_functions.h>
+#include <hip/amd_detail/texture_indirect_functions.h>
 extern int HIP_TRACE_API;
 #endif // !defined(__HIPCC_RTC__)
 
 #ifdef __cplusplus
 #include <hip/amd_detail/hip_ldg.h>
 #endif
-#include <hip/amd_detail/amd_hip_atomic.h>
+
 #include <hip/amd_detail/host_defines.h>
-#include <hip/amd_detail/amd_device_functions.h>
-#include <hip/amd_detail/amd_surface_functions.h>
-#include <hip/amd_detail/texture_fetch_functions.h>
-#include <hip/amd_detail/texture_indirect_functions.h>
 
 // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
 #if defined(__KALMAR_ACCELERATOR__) && !defined(__HCC_ACCELERATOR__)
@@ -244,10 +245,10 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
 #include <hip/hip_runtime_api.h>
 #endif // !defined(__HIPCC_RTC__)
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int);
 struct __HIP_BlockIdx {
   __device__
   std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); }
@@ -282,7 +283,7 @@ typedef struct dim3 {
 } dim3;
 #endif // !defined(__HIPCC_RTC__)
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(uint);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_size(unsigned int);
 
 #ifdef __cplusplus
 template <typename F> struct __HIP_Coordinates {
@@ -367,33 +368,35 @@ static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{};
 static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{};
 #endif // __cplusplus
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
 #define hipThreadIdx_x (__ockl_get_local_id(0))
 #define hipThreadIdx_y (__ockl_get_local_id(1))
 #define hipThreadIdx_z (__ockl_get_local_id(2))
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
 #define hipBlockIdx_x (__ockl_get_group_id(0))
 #define hipBlockIdx_y (__ockl_get_group_id(1))
 #define hipBlockIdx_z (__ockl_get_group_id(2))
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
 #define hipBlockDim_x (__ockl_get_local_size(0))
 #define hipBlockDim_y (__ockl_get_local_size(1))
 #define hipBlockDim_z (__ockl_get_local_size(2))
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(unsigned int);
 #define hipGridDim_x (__ockl_get_num_groups(0))
 #define hipGridDim_y (__ockl_get_num_groups(1))
 #define hipGridDim_z (__ockl_get_num_groups(2))
 
+#if !defined(__HIPCC_RTC__)
 #include <hip/amd_detail/amd_math_functions.h>
+#endif
 
 #if __HIP_HCC_COMPAT_MODE__
 // Define HCC work item functions in terms of HIP builtin variables.
 #pragma push_macro("__DEFINE_HCC_FUNC")
 #define __DEFINE_HCC_FUNC(hc_fun,hip_var) \
-inline __device__ __attribute__((always_inline)) uint hc_get_##hc_fun(uint i) { \
+inline __device__ __attribute__((always_inline)) unsigned int hc_get_##hc_fun(unsigned int i) { \
   if (i==0) \
     return hip_var.x; \
   else if(i==1) \
@@ -408,11 +411,11 @@ __DEFINE_HCC_FUNC(group_size, blockDim)
 __DEFINE_HCC_FUNC(num_groups, gridDim)
 #pragma pop_macro("__DEFINE_HCC_FUNC")
 
-extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(uint);
-inline __device__ __attribute__((always_inline)) uint
+extern "C" __device__ __attribute__((const)) size_t __ockl_get_global_id(unsigned int);
+inline __device__ __attribute__((always_inline)) unsigned int
 hc_get_workitem_absolute_id(int dim)
 {
-  return (uint)__ockl_get_global_id(dim);
+  return (unsigned int)__ockl_get_global_id(dim);
 }
 
 #endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h
index 3c17d298ea..37172f402f 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_math_functions.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -22,13 +22,18 @@ THE SOFTWARE.
 
 #pragma once
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wundef"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include "hip_fp16_math_fwd.h"
 #include "amd_hip_vector_types.h"
 #include "math_fwd.h"
 
 #include <hip/amd_detail/host_defines.h>
 
-#if !defined(__HIPCC_RTC__)
 #include <algorithm>
 // assert.h is only for the host version of assert.
 // The device version of assert is implemented in hip/amd_detail/hip_runtime.h.
@@ -60,77 +65,6 @@ struct __numeric_type<_Float16>
 #define __DEVICE__ static __device__
 #define __RETURN_TYPE bool
 
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-__DEVICE__
-inline
-uint64_t __make_mantissa_base8(const char* tagp)
-{
-    uint64_t r = 0;
-    while (tagp) {
-        char tmp = *tagp;
-
-        if (tmp >= '0' && tmp <= '7') r = (r * 8u) + tmp - '0';
-        else return 0;
-
-        ++tagp;
-    }
-
-    return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa_base10(const char* tagp)
-{
-    uint64_t r = 0;
-    while (tagp) {
-        char tmp = *tagp;
-
-        if (tmp >= '0' && tmp <= '9') r = (r * 10u) + tmp - '0';
-        else return 0;
-
-        ++tagp;
-    }
-
-    return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa_base16(const char* tagp)
-{
-    uint64_t r = 0;
-    while (tagp) {
-        char tmp = *tagp;
-
-        if (tmp >= '0' && tmp <= '9') r = (r * 16u) + tmp - '0';
-        else if (tmp >= 'a' && tmp <= 'f') r = (r * 16u) + tmp - 'a' + 10;
-        else if (tmp >= 'A' && tmp <= 'F') r = (r * 16u) + tmp - 'A' + 10;
-        else return 0;
-
-        ++tagp;
-    }
-
-    return r;
-}
-
-__DEVICE__
-inline
-uint64_t __make_mantissa(const char* tagp)
-{
-    if (!tagp) return 0u;
-
-    if (*tagp == '0') {
-        ++tagp;
-
-        if (*tagp == 'x' || *tagp == 'X') return __make_mantissa_base16(tagp);
-        else return __make_mantissa_base8(tagp);
-    }
-
-    return __make_mantissa_base10(tagp);
-}
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
 // DOT FUNCTIONS
 #if __HIP_CLANG_ONLY__
 __DEVICE__
@@ -165,1338 +99,15 @@ uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
 }
 #endif
 
-#if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-// BEGIN FLOAT
-__DEVICE__
-inline
-float abs(float x) { return __ocml_fabs_f32(x); }
-__DEVICE__
-inline
-float acosf(float x) { return __ocml_acos_f32(x); }
-__DEVICE__
-inline
-float acoshf(float x) { return __ocml_acosh_f32(x); }
-__DEVICE__
-inline
-float asinf(float x) { return __ocml_asin_f32(x); }
-__DEVICE__
-inline
-float asinhf(float x) { return __ocml_asinh_f32(x); }
-__DEVICE__
-inline
-float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
-__DEVICE__
-inline
-float atanf(float x) { return __ocml_atan_f32(x); }
-__DEVICE__
-inline
-float atanhf(float x) { return __ocml_atanh_f32(x); }
-__DEVICE__
-inline
-float cbrtf(float x) { return __ocml_cbrt_f32(x); }
-__DEVICE__
-inline
-float ceilf(float x) { return __ocml_ceil_f32(x); }
-__DEVICE__
-inline
-float copysignf(float x, float y) { return __ocml_copysign_f32(x, y); }
-__DEVICE__
-inline
-float cosf(float x) { return __ocml_cos_f32(x); }
-__DEVICE__
-inline
-float coshf(float x) { return __ocml_cosh_f32(x); }
-__DEVICE__
-inline
-float cospif(float x) { return __ocml_cospi_f32(x); }
-__DEVICE__
-inline
-float cyl_bessel_i0f(float x) { return __ocml_i0_f32(x); }
-__DEVICE__
-inline
-float cyl_bessel_i1f(float x) { return __ocml_i1_f32(x); }
-__DEVICE__
-inline
-float erfcf(float x) { return __ocml_erfc_f32(x); }
-__DEVICE__
-inline
-float erfcinvf(float x) { return __ocml_erfcinv_f32(x); }
-__DEVICE__
-inline
-float erfcxf(float x) { return __ocml_erfcx_f32(x); }
-__DEVICE__
-inline
-float erff(float x) { return __ocml_erf_f32(x); }
-__DEVICE__
-inline
-float erfinvf(float x) { return __ocml_erfinv_f32(x); }
-__DEVICE__
-inline
-float exp10f(float x) { return __ocml_exp10_f32(x); }
-__DEVICE__
-inline
-float exp2f(float x) { return __ocml_exp2_f32(x); }
-__DEVICE__
-inline
-float expf(float x) { return __ocml_exp_f32(x); }
-__DEVICE__
-inline
-float expm1f(float x) { return __ocml_expm1_f32(x); }
-__DEVICE__
-inline
-float fabsf(float x) { return __ocml_fabs_f32(x); }
-__DEVICE__
-inline
-float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
-__DEVICE__
-inline
-float fdividef(float x, float y) { return x / y; }
-__DEVICE__
-inline
-float floorf(float x) { return __ocml_floor_f32(x); }
-__DEVICE__
-inline
-float fmaf(float x, float y, float z) { return __ocml_fma_f32(x, y, z); }
-__DEVICE__
-inline
-float fmaxf(float x, float y) { return __ocml_fmax_f32(x, y); }
-__DEVICE__
-inline
-float fminf(float x, float y) { return __ocml_fmin_f32(x, y); }
-__DEVICE__
-inline
-float fmodf(float x, float y) { return __ocml_fmod_f32(x, y); }
-__DEVICE__
-inline
-float frexpf(float x, int* nptr)
-{
-    int tmp;
-    float r =
-        __ocml_frexp_f32(x, (__attribute__((address_space(5))) int*) &tmp);
-    *nptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-float hypotf(float x, float y) { return __ocml_hypot_f32(x, y); }
-__DEVICE__
-inline
-int ilogbf(float x) { return __ocml_ilogb_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isfinite(float x) { return __ocml_isfinite_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isinf(float x) { return __ocml_isinf_f32(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isnan(float x) { return __ocml_isnan_f32(x); }
-__DEVICE__
-inline
-float j0f(float x) { return __ocml_j0_f32(x); }
-__DEVICE__
-inline
-float j1f(float x) { return __ocml_j1_f32(x); }
-__DEVICE__
-inline
-float jnf(int n, float x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case.
-    if (n == 0) return j0f(x);
-    if (n == 1) return j1f(x);
-
-    float x0 = j0f(x);
-    float x1 = j1f(x);
-    for (int i = 1; i < n; ++i) {
-        float x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-__DEVICE__
-inline
-float ldexpf(float x, int e) { return __ocml_ldexp_f32(x, e); }
-__DEVICE__
-inline
-float lgammaf(float x) { return __ocml_lgamma_f32(x); }
-__DEVICE__
-inline
-long long int llrintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-long long int llroundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float log10f(float x) { return __ocml_log10_f32(x); }
-__DEVICE__
-inline
-float log1pf(float x) { return __ocml_log1p_f32(x); }
-__DEVICE__
-inline
-float log2f(float x) { return __ocml_log2_f32(x); }
-__DEVICE__
-inline
-float logbf(float x) { return __ocml_logb_f32(x); }
-__DEVICE__
-inline
-float logf(float x) { return __ocml_log_f32(x); }
-__DEVICE__
-inline
-long int lrintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-long int lroundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float modff(float x, float* iptr)
-{
-    float tmp;
-    float r =
-        __ocml_modf_f32(x, (__attribute__((address_space(5))) float*) &tmp);
-    *iptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-float nanf(const char* tagp)
-{
-    union {
-        float val;
-        struct ieee_float {
-            uint32_t mantissa : 22;
-            uint32_t quiet : 1;
-            uint32_t exponent : 8;
-            uint32_t sign : 1;
-        } bits;
-
-        static_assert(sizeof(float) == sizeof(ieee_float), "");
-    } tmp;
-
-    tmp.bits.sign = 0u;
-    tmp.bits.exponent = ~0u;
-    tmp.bits.quiet = 1u;
-    tmp.bits.mantissa = __make_mantissa(tagp);
-
-    return tmp.val;
-}
-__DEVICE__
-inline
-float nearbyintf(float x) { return __ocml_nearbyint_f32(x); }
-__DEVICE__
-inline
-float nextafterf(float x, float y) { return __ocml_nextafter_f32(x, y); }
-__DEVICE__
-inline
-float norm3df(float x, float y, float z) { return __ocml_len3_f32(x, y, z); }
-__DEVICE__
-inline
-float norm4df(float x, float y, float z, float w)
-{
-    return __ocml_len4_f32(x, y, z, w);
-}
-__DEVICE__
-inline
-float normcdff(float x) { return __ocml_ncdf_f32(x); }
-__DEVICE__
-inline
-float normcdfinvf(float x) { return __ocml_ncdfinv_f32(x); }
-__DEVICE__
-inline
-float normf(int dim, const float* a)
-{   // TODO: placeholder until OCML adds support.
-    float r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_sqrt_f32(r);
-}
-__DEVICE__
-inline
-float powf(float x, float y) { return __ocml_pow_f32(x, y); }
-__DEVICE__
-inline
-float powif(float base, int iexp) { return __ocml_pown_f32(base, iexp); }
-__DEVICE__
-inline
-float rcbrtf(float x) { return __ocml_rcbrt_f32(x); }
-__DEVICE__
-inline
-float remainderf(float x, float y) { return __ocml_remainder_f32(x, y); }
-__DEVICE__
-inline
-float remquof(float x, float y, int* quo)
-{
-    int tmp;
-    float r =
-        __ocml_remquo_f32(x, y, (__attribute__((address_space(5))) int*) &tmp);
-    *quo = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-float rhypotf(float x, float y) { return __ocml_rhypot_f32(x, y); }
-__DEVICE__
-inline
-float rintf(float x) { return __ocml_rint_f32(x); }
-__DEVICE__
-inline
-float rnorm3df(float x, float y, float z)
-{
-    return __ocml_rlen3_f32(x, y, z);
-}
-
-__DEVICE__
-inline
-float rnorm4df(float x, float y, float z, float w)
-{
-    return __ocml_rlen4_f32(x, y, z, w);
-}
-__DEVICE__
-inline
-float rnormf(int dim, const float* a)
-{   // TODO: placeholder until OCML adds support.
-    float r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_rsqrt_f32(r);
-}
-__DEVICE__
-inline
-float roundf(float x) { return __ocml_round_f32(x); }
-__DEVICE__
-inline
-float rsqrtf(float x) { return __ocml_rsqrt_f32(x); }
-__DEVICE__
-inline
-float scalblnf(float x, long int n)
-{
-    return (n < INT_MAX) ? __ocml_scalbn_f32(x, n) : __ocml_scalb_f32(x, n);
-}
-__DEVICE__
-inline
-float scalbnf(float x, int n) { return __ocml_scalbn_f32(x, n); }
-__DEVICE__
-inline
-__RETURN_TYPE signbit(float x) { return __ocml_signbit_f32(x); }
-__DEVICE__
-inline
-void sincosf(float x, float* sptr, float* cptr)
-{
-    float tmp;
-
-    *sptr =
-        __ocml_sincos_f32(x, (__attribute__((address_space(5))) float*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-void sincospif(float x, float* sptr, float* cptr)
-{
-    float tmp;
-
-    *sptr =
-        __ocml_sincospi_f32(x, (__attribute__((address_space(5))) float*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-float sinf(float x) { return __ocml_sin_f32(x); }
-__DEVICE__
-inline
-float sinhf(float x) { return __ocml_sinh_f32(x); }
-__DEVICE__
-inline
-float sinpif(float x) { return __ocml_sinpi_f32(x); }
-__DEVICE__
-inline
-float sqrtf(float x) { return __ocml_sqrt_f32(x); }
-__DEVICE__
-inline
-float tanf(float x) { return __ocml_tan_f32(x); }
-__DEVICE__
-inline
-float tanhf(float x) { return __ocml_tanh_f32(x); }
-__DEVICE__
-inline
-float tgammaf(float x) { return __ocml_tgamma_f32(x); }
-__DEVICE__
-inline
-float truncf(float x) { return __ocml_trunc_f32(x); }
-__DEVICE__
-inline
-float y0f(float x) { return __ocml_y0_f32(x); }
-__DEVICE__
-inline
-float y1f(float x) { return __ocml_y1_f32(x); }
-__DEVICE__
-inline
-float ynf(int n, float x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case. Placeholder until OCML adds
-    //       support.
-    if (n == 0) return y0f(x);
-    if (n == 1) return y1f(x);
-
-    float x0 = y0f(x);
-    float x1 = y1f(x);
-    for (int i = 1; i < n; ++i) {
-        float x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-
-// BEGIN INTRINSICS
-__DEVICE__
-inline
-float __cosf(float x) { return __ocml_native_cos_f32(x); }
-__DEVICE__
-inline
-float __exp10f(float x) { return __ocml_native_exp10_f32(x); }
-__DEVICE__
-inline
-float __expf(float x) { return __ocml_native_exp_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fadd_rn(float x, float y) { return x + y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
-__DEVICE__
-inline
-float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fdiv_rn(float x, float y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fdividef(float x, float y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmaf_rd(float x, float y, float z)
-{
-    return __ocml_fma_rtn_f32(x, y, z);
-}
-#endif
-__DEVICE__
-inline
-float __fmaf_rn(float x, float y, float z)
-{
-    return __ocml_fma_f32(x, y, z);
-}
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmaf_ru(float x, float y, float z)
-{
-    return __ocml_fma_rtp_f32(x, y, z);
-}
-__DEVICE__
-inline
-float __fmaf_rz(float x, float y, float z)
-{
-   return __ocml_fma_rtz_f32(x, y, z);
-}
-__DEVICE__
-inline
-float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fmul_rn(float x, float y) { return x * y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fmul_ru(float x, float y)  { return __ocml_mul_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
-__DEVICE__
-inline
-float __frcp_rd(float x) { return __builtin_amdgcn_rcpf(x); }
-#endif
-__DEVICE__
-inline
-float __frcp_rn(float x) { return __builtin_amdgcn_rcpf(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __frcp_ru(float x) { return __builtin_amdgcn_rcpf(x); }
-__DEVICE__
-inline
-float __frcp_rz(float x) { return __builtin_amdgcn_rcpf(x); }
-#endif
-__DEVICE__
-inline
-float __frsqrt_rn(float x) { return __builtin_amdgcn_rsqf(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
-#endif
-__DEVICE__
-inline
-float __fsqrt_rn(float x) { return __ocml_native_sqrt_f32(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
-__DEVICE__
-inline
-float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
-__DEVICE__
-inline
-float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __fsub_rn(float x, float y) { return x - y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
-__DEVICE__
-inline
-float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
-#endif
-__DEVICE__
-inline
-float __log10f(float x) { return __ocml_native_log10_f32(x); }
-__DEVICE__
-inline
-float __log2f(float x) { return __ocml_native_log2_f32(x); }
-__DEVICE__
-inline
-float __logf(float x) { return __ocml_native_log_f32(x); }
-__DEVICE__
-inline
-float __powf(float x, float y) { return __ocml_pow_f32(x, y); }
-__DEVICE__
-inline
-float __saturatef(float x) { return (x < 0) ? 0 : ((x > 1) ? 1 : x); }
-__DEVICE__
-inline
-void __sincosf(float x, float* sptr, float* cptr)
-{
-    *sptr = __ocml_native_sin_f32(x);
-    *cptr = __ocml_native_cos_f32(x);
-}
-__DEVICE__
-inline
-float __sinf(float x) { return __ocml_native_sin_f32(x); }
-__DEVICE__
-inline
-float __tanf(float x) { return __ocml_tan_f32(x); }
-// END INTRINSICS
-// END FLOAT
-
-// BEGIN DOUBLE
-__DEVICE__
-inline
-double abs(double x) { return __ocml_fabs_f64(x); }
-__DEVICE__
-inline
-double acos(double x) { return __ocml_acos_f64(x); }
-__DEVICE__
-inline
-double acosh(double x) { return __ocml_acosh_f64(x); }
-__DEVICE__
-inline
-double asin(double x) { return __ocml_asin_f64(x); }
-__DEVICE__
-inline
-double asinh(double x) { return __ocml_asinh_f64(x); }
-__DEVICE__
-inline
-double atan(double x) { return __ocml_atan_f64(x); }
-__DEVICE__
-inline
-double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
-__DEVICE__
-inline
-double atanh(double x) { return __ocml_atanh_f64(x); }
-__DEVICE__
-inline
-double cbrt(double x) { return __ocml_cbrt_f64(x); }
-__DEVICE__
-inline
-double ceil(double x) { return __ocml_ceil_f64(x); }
-__DEVICE__
-inline
-double copysign(double x, double y) { return __ocml_copysign_f64(x, y); }
-__DEVICE__
-inline
-double cos(double x)  { return __ocml_cos_f64(x); }
-__DEVICE__
-inline
-double cosh(double x) { return __ocml_cosh_f64(x); }
-__DEVICE__
-inline
-double cospi(double x) { return __ocml_cospi_f64(x); }
-__DEVICE__
-inline
-double cyl_bessel_i0(double x) { return __ocml_i0_f64(x); }
-__DEVICE__
-inline
-double cyl_bessel_i1(double x) { return __ocml_i1_f64(x); }
-__DEVICE__
-inline
-double erf(double x) { return __ocml_erf_f64(x); }
-__DEVICE__
-inline
-double erfc(double x) { return __ocml_erfc_f64(x); }
-__DEVICE__
-inline
-double erfcinv(double x) { return __ocml_erfcinv_f64(x); }
-__DEVICE__
-inline
-double erfcx(double x) { return __ocml_erfcx_f64(x); }
-__DEVICE__
-inline
-double erfinv(double x) { return __ocml_erfinv_f64(x); }
-__DEVICE__
-inline
-double exp(double x) { return __ocml_exp_f64(x); }
-__DEVICE__
-inline
-double exp10(double x) { return __ocml_exp10_f64(x); }
-__DEVICE__
-inline
-double exp2(double x) { return __ocml_exp2_f64(x); }
-__DEVICE__
-inline
-double expm1(double x) { return __ocml_expm1_f64(x); }
-__DEVICE__
-inline
-double fabs(double x) { return __ocml_fabs_f64(x); }
-__DEVICE__
-inline
-double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
-__DEVICE__
-inline
-double floor(double x) { return __ocml_floor_f64(x); }
-__DEVICE__
-inline
-double fma(double x, double y, double z) { return __ocml_fma_f64(x, y, z); }
-__DEVICE__
-inline
-double fmax(double x, double y) { return __ocml_fmax_f64(x, y); }
-__DEVICE__
-inline
-double fmin(double x, double y) { return __ocml_fmin_f64(x, y); }
-__DEVICE__
-inline
-double fmod(double x, double y) { return __ocml_fmod_f64(x, y); }
-__DEVICE__
-inline
-double frexp(double x, int* nptr)
-{
-    int tmp;
-    double r =
-        __ocml_frexp_f64(x, (__attribute__((address_space(5))) int*) &tmp);
-    *nptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-double hypot(double x, double y) { return __ocml_hypot_f64(x, y); }
-__DEVICE__
-inline
-int ilogb(double x) { return __ocml_ilogb_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isfinite(double x) { return __ocml_isfinite_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isinf(double x) { return __ocml_isinf_f64(x); }
-__DEVICE__
-inline
-__RETURN_TYPE isnan(double x) { return __ocml_isnan_f64(x); }
-__DEVICE__
-inline
-double j0(double x) { return __ocml_j0_f64(x); }
-__DEVICE__
-inline
-double j1(double x) { return __ocml_j1_f64(x); }
-__DEVICE__
-inline
-double jn(int n, double x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case. Placeholder until OCML adds
-    //       support.
-    if (n == 0) return j0f(x);
-    if (n == 1) return j1f(x);
-
-    double x0 = j0f(x);
-    double x1 = j1f(x);
-    for (int i = 1; i < n; ++i) {
-        double x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-__DEVICE__
-inline
-double ldexp(double x, int e) { return __ocml_ldexp_f64(x, e); }
-__DEVICE__
-inline
-double lgamma(double x) { return __ocml_lgamma_f64(x); }
-__DEVICE__
-inline
-long long int llrint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-long long int llround(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double log(double x) { return __ocml_log_f64(x); }
-__DEVICE__
-inline
-double log10(double x) { return __ocml_log10_f64(x); }
-__DEVICE__
-inline
-double log1p(double x) { return __ocml_log1p_f64(x); }
-__DEVICE__
-inline
-double log2(double x) { return __ocml_log2_f64(x); }
-__DEVICE__
-inline
-double logb(double x) { return __ocml_logb_f64(x); }
-__DEVICE__
-inline
-long int lrint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-long int lround(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double modf(double x, double* iptr)
-{
-    double tmp;
-    double r =
-        __ocml_modf_f64(x, (__attribute__((address_space(5))) double*) &tmp);
-    *iptr = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-double nan(const char* tagp)
-{
-#if !_WIN32
-    union {
-        double val;
-        struct ieee_double {
-            uint64_t mantissa : 51;
-            uint32_t quiet : 1;
-            uint32_t exponent : 11;
-            uint32_t sign : 1;
-        }  bits;
-        static_assert(sizeof(double) == sizeof(ieee_double), "");
-    } tmp;
-
-    tmp.bits.sign = 0u;
-    tmp.bits.exponent = ~0u;
-    tmp.bits.quiet = 1u;
-    tmp.bits.mantissa = __make_mantissa(tagp);
-
-    return tmp.val;
-#else
-    static_assert(sizeof(uint64_t)==sizeof(double));
-    uint64_t val = __make_mantissa(tagp);
-    val |= 0xFFF << 51;
-    return *reinterpret_cast<double*>(&val);
-#endif
-}
-__DEVICE__
-inline
-double nearbyint(double x) { return __ocml_nearbyint_f64(x); }
-__DEVICE__
-inline
-double nextafter(double x, double y) { return __ocml_nextafter_f64(x, y); }
-__DEVICE__
-inline
-double norm(int dim, const double* a)
-{   // TODO: placeholder until OCML adds support.
-    double r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_sqrt_f64(r);
-}
-__DEVICE__
-inline
-double norm3d(double x, double y, double z)
-{
-    return __ocml_len3_f64(x, y, z);
-}
-__DEVICE__
-inline
-double norm4d(double x, double y, double z, double w)
-{
-    return __ocml_len4_f64(x, y, z, w);
-}
-__DEVICE__
-inline
-double normcdf(double x) { return __ocml_ncdf_f64(x); }
-__DEVICE__
-inline
-double normcdfinv(double x) { return __ocml_ncdfinv_f64(x); }
-__DEVICE__
-inline
-double pow(double x, double y) { return __ocml_pow_f64(x, y); }
-__DEVICE__
-inline
-double powi(double base, int iexp) { return __ocml_pown_f64(base, iexp); }
-__DEVICE__
-inline
-double rcbrt(double x) { return __ocml_rcbrt_f64(x); }
-__DEVICE__
-inline
-double remainder(double x, double y) { return __ocml_remainder_f64(x, y); }
-__DEVICE__
-inline
-double remquo(double x, double y, int* quo)
-{
-    int tmp;
-    double r =
-        __ocml_remquo_f64(x, y, (__attribute__((address_space(5))) int*) &tmp);
-    *quo = tmp;
-
-    return r;
-}
-__DEVICE__
-inline
-double rhypot(double x, double y) { return __ocml_rhypot_f64(x, y); }
-__DEVICE__
-inline
-double rint(double x) { return __ocml_rint_f64(x); }
-__DEVICE__
-inline
-double rnorm(int dim, const double* a)
-{   // TODO: placeholder until OCML adds support.
-    double r = 0;
-    while (dim--) { r += a[0] * a[0]; ++a; }
-
-    return __ocml_rsqrt_f64(r);
-}
-__DEVICE__
-inline
-double rnorm3d(double x, double y, double z)
-{
-    return __ocml_rlen3_f64(x, y, z);
-}
-__DEVICE__
-inline
-double rnorm4d(double x, double y, double z, double w)
-{
-    return __ocml_rlen4_f64(x, y, z, w);
-}
-__DEVICE__
-inline
-double round(double x) { return __ocml_round_f64(x); }
-__DEVICE__
-inline
-double rsqrt(double x) { return __ocml_rsqrt_f64(x); }
-__DEVICE__
-inline
-double scalbln(double x, long int n)
-{
-    return (n < INT_MAX) ? __ocml_scalbn_f64(x, n) : __ocml_scalb_f64(x, n);
-}
-__DEVICE__
-inline
-double scalbn(double x, int n) { return __ocml_scalbn_f64(x, n); }
-__DEVICE__
-inline
-__RETURN_TYPE signbit(double x) { return __ocml_signbit_f64(x); }
-__DEVICE__
-inline
-double sin(double x) { return __ocml_sin_f64(x); }
-__DEVICE__
-inline
-void sincos(double x, double* sptr, double* cptr)
-{
-    double tmp;
-    *sptr =
-        __ocml_sincos_f64(x, (__attribute__((address_space(5))) double*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-void sincospi(double x, double* sptr, double* cptr)
-{
-    double tmp;
-    *sptr = __ocml_sincospi_f64(
-        x, (__attribute__((address_space(5))) double*) &tmp);
-    *cptr = tmp;
-}
-__DEVICE__
-inline
-double sinh(double x) { return __ocml_sinh_f64(x); }
-__DEVICE__
-inline
-double sinpi(double x) { return __ocml_sinpi_f64(x); }
-__DEVICE__
-inline
-double sqrt(double x) { return __ocml_sqrt_f64(x); }
-__DEVICE__
-inline
-double tan(double x) { return __ocml_tan_f64(x); }
-__DEVICE__
-inline
-double tanh(double x) { return __ocml_tanh_f64(x); }
-__DEVICE__
-inline
-double tgamma(double x) { return __ocml_tgamma_f64(x); }
-__DEVICE__
-inline
-double trunc(double x) { return __ocml_trunc_f64(x); }
-__DEVICE__
-inline
-double y0(double x) { return __ocml_y0_f64(x); }
-__DEVICE__
-inline
-double y1(double x) { return __ocml_y1_f64(x); }
-__DEVICE__
-inline
-double yn(int n, double x)
-{   // TODO: we could use Ahmes multiplication and the Miller & Brown algorithm
-    //       for linear recurrences to get O(log n) steps, but it's unclear if
-    //       it'd be beneficial in this case. Placeholder until OCML adds
-    //       support.
-    if (n == 0) return j0f(x);
-    if (n == 1) return j1f(x);
-
-    double x0 = j0f(x);
-    double x1 = j1f(x);
-    for (int i = 1; i < n; ++i) {
-        double x2 = (2 * i) / x * x1 - x0;
-        x0 = x1;
-        x1 = x2;
-    }
-
-    return x1;
-}
-
-// BEGIN INTRINSICS
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dadd_rn(double x, double y) { return x + y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __ddiv_rn(double x, double y) { return x / y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dmul_rn(double x, double y) { return x * y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __drcp_rd(double x) { return __builtin_amdgcn_rcp(x); }
-#endif
-__DEVICE__
-inline
-double __drcp_rn(double x) { return __builtin_amdgcn_rcp(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __drcp_ru(double x) { return __builtin_amdgcn_rcp(x); }
-__DEVICE__
-inline
-double __drcp_rz(double x) { return __builtin_amdgcn_rcp(x); }
-__DEVICE__
-inline
-double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
-#endif
-__DEVICE__
-inline
-double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
-__DEVICE__
-inline
-double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
-__DEVICE__
-inline
-double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
-#endif
-__DEVICE__
-inline
-double __dsub_rn(double x, double y) { return x - y; }
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
-__DEVICE__
-inline
-double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
-__DEVICE__
-inline
-double __fma_rd(double x, double y, double z)
-{
-    return __ocml_fma_rtn_f64(x, y, z);
-}
-#endif
-__DEVICE__
-inline
-double __fma_rn(double x, double y, double z)
-{
-    return __ocml_fma_f64(x, y, z);
-}
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-__DEVICE__
-inline
-double __fma_ru(double x, double y, double z)
-{
-    return __ocml_fma_rtp_f64(x, y, z);
-}
-__DEVICE__
-inline
-double __fma_rz(double x, double y, double z)
-{
-    return __ocml_fma_rtz_f64(x, y, z);
-}
-#endif
-// END INTRINSICS
-// END DOUBLE
-
-// BEGIN INTEGER
-__DEVICE__
-inline
-int abs(int x)
-{
-    int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
-    return (x ^ sgn) - sgn;
-}
-__DEVICE__
-inline
-long labs(long x)
-{
-    long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
-    return (x ^ sgn) - sgn;
-}
-__DEVICE__
-inline
-long long llabs(long long x)
-{
-    long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
-    return (x ^ sgn) - sgn;
-}
-
-#if defined(__cplusplus)
-    __DEVICE__
-    inline
-    long abs(long x) { return labs(x); }
-    __DEVICE__
-    inline
-    long long abs(long long x) { return llabs(x); }
-#endif
-// END INTEGER
-
-__DEVICE__
-inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
-    return __ocml_fma_f16(x, y, z);
-}
-
-__DEVICE__
-inline float fma(float x, float y, float z) {
-    return fmaf(x, y, z);
-}
-
-#pragma push_macro("__DEF_FLOAT_FUN")
-#pragma push_macro("__DEF_FLOAT_FUN2")
-#pragma push_macro("__DEF_FLOAT_FUN2I")
-#pragma push_macro("__HIP_OVERLOAD")
-#pragma push_macro("__HIP_OVERLOAD2")
-
-// __hip_enable_if::type is a type function which returns __T if __B is true.
-template<bool __B, class __T = void>
-struct __hip_enable_if {};
-
-template <class __T> struct __hip_enable_if<true, __T> {
-  typedef __T type;
-};
-
-// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
-// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
-// floor(double).
-#define __HIP_OVERLOAD1(__retty, __fn)                                         \
-  template <typename __T>                                                      \
-  __DEVICE__                                                                   \
-      typename __hip_enable_if<std::numeric_limits<__T>::is_integer,           \
-                                      __retty>::type                           \
-      __fn(__T __x) {                                                          \
-    return ::__fn((double)__x);                                                \
-  }
-
-// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
-// or integer argument to avoid compilation error due to ambibuity. e.g.
-// max(5.0f, 6.0) is resolved with max(double, double).
-#define __HIP_OVERLOAD2(__retty, __fn)                                         \
-  template <typename __T1, typename __T2>                                      \
-  __DEVICE__ typename __hip_enable_if<                                         \
-      std::numeric_limits<__T1>::is_specialized &&                             \
-          std::numeric_limits<__T2>::is_specialized,                           \
-      __retty>::type                                                           \
-  __fn(__T1 __x, __T2 __y) {                                                   \
-    return __fn((double)__x, (double)__y);                                     \
-  }
-
-// Define cmath functions with float argument and returns float.
-#define __DEF_FUN1(retty, func) \
-__DEVICE__ \
-inline \
-float func(float x) \
-{ \
-  return func##f(x); \
-} \
-__HIP_OVERLOAD1(retty, func)
-
-// Define cmath functions with float argument and returns retty.
-#define __DEF_FUNI(retty, func) \
-__DEVICE__ \
-inline \
-retty func(float x) \
-{ \
-  return func##f(x); \
-} \
-__HIP_OVERLOAD1(retty, func)
-
-// define cmath functions with two float arguments.
-#define __DEF_FUN2(retty, func) \
-__DEVICE__ \
-inline \
-float func(float x, float y) \
-{ \
-  return func##f(x, y); \
-} \
-__HIP_OVERLOAD2(retty, func)
-
-__DEF_FUN1(double, acos)
-__DEF_FUN1(double, acosh)
-__DEF_FUN1(double, asin)
-__DEF_FUN1(double, asinh)
-__DEF_FUN1(double, atan)
-__DEF_FUN2(double, atan2);
-__DEF_FUN1(double, atanh)
-__DEF_FUN1(double, cbrt)
-__DEF_FUN1(double, ceil)
-__DEF_FUN2(double, copysign);
-__DEF_FUN1(double, cos)
-__DEF_FUN1(double, cosh)
-__DEF_FUN1(double, erf)
-__DEF_FUN1(double, erfc)
-__DEF_FUN1(double, exp)
-__DEF_FUN1(double, exp2)
-__DEF_FUN1(double, expm1)
-__DEF_FUN1(double, fabs)
-__DEF_FUN2(double, fdim);
-__DEF_FUN1(double, floor)
-__DEF_FUN2(double, fmax);
-__DEF_FUN2(double, fmin);
-__DEF_FUN2(double, fmod);
-//__HIP_OVERLOAD1(int, fpclassify)
-__DEF_FUN2(double, hypot);
-__DEF_FUNI(int, ilogb)
-__HIP_OVERLOAD1(bool, isfinite)
-__HIP_OVERLOAD2(bool, isgreater);
-__HIP_OVERLOAD2(bool, isgreaterequal);
-__HIP_OVERLOAD1(bool, isinf);
-__HIP_OVERLOAD2(bool, isless);
-__HIP_OVERLOAD2(bool, islessequal);
-__HIP_OVERLOAD2(bool, islessgreater);
-__HIP_OVERLOAD1(bool, isnan);
-//__HIP_OVERLOAD1(bool, isnormal)
-__HIP_OVERLOAD2(bool, isunordered);
-__DEF_FUN1(double, lgamma)
-__DEF_FUN1(double, log)
-__DEF_FUN1(double, log10)
-__DEF_FUN1(double, log1p)
-__DEF_FUN1(double, log2)
-__DEF_FUN1(double, logb)
-__DEF_FUNI(long long, llrint)
-__DEF_FUNI(long long, llround)
-__DEF_FUNI(long, lrint)
-__DEF_FUNI(long, lround)
-__DEF_FUN1(double, nearbyint);
-__DEF_FUN2(double, nextafter);
-__DEF_FUN2(double, pow);
-__DEF_FUN2(double, remainder);
-__DEF_FUN1(double, rint);
-__DEF_FUN1(double, round);
-__HIP_OVERLOAD1(bool, signbit)
-__DEF_FUN1(double, sin)
-__DEF_FUN1(double, sinh)
-__DEF_FUN1(double, sqrt)
-__DEF_FUN1(double, tan)
-__DEF_FUN1(double, tanh)
-__DEF_FUN1(double, tgamma)
-__DEF_FUN1(double, trunc);
-
-// define cmath functions with a float and an integer argument.
-#define __DEF_FLOAT_FUN2I(func) \
-__DEVICE__ \
-inline \
-float func(float x, int y) \
-{ \
-  return func##f(x, y); \
-}
-__DEF_FLOAT_FUN2I(scalbn)
-__DEF_FLOAT_FUN2I(ldexp)
-
-template<class T>
-__DEVICE__ inline T min(T arg1, T arg2) {
-  return (arg1 < arg2) ? arg1 : arg2;
-}
-
-template<class T>
-__DEVICE__ inline T max(T arg1, T arg2) {
-  return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__ inline int min(int arg1, int arg2) {
-  return (arg1 < arg2) ? arg1 : arg2;
-}
-__DEVICE__ inline int max(int arg1, int arg2) {
-  return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__ inline int min(uint32_t arg1, int arg2) {
-  return (arg1 < arg2) ? arg1 : arg2;
-}
-__DEVICE__ inline int max(uint32_t arg1, int arg2) {
-  return (arg1 > arg2) ? arg1 : arg2;
-}
-
-__DEVICE__
-inline
-float max(float x, float y) {
-  return fmaxf(x, y);
-}
-
-__DEVICE__
-inline
-double max(double x, double y) {
-  return fmax(x, y);
-}
-
-__DEVICE__
-inline
-float min(float x, float y) {
-  return fminf(x, y);
-}
-
-__DEVICE__
-inline
-double min(double x, double y) {
-  return fmin(x, y);
-}
-
-__HIP_OVERLOAD2(double, max)
-__HIP_OVERLOAD2(double, min)
-
-#if !defined(__HIPCC_RTC__)
-__host__ inline static int min(int arg1, int arg2) {
-  return std::min(arg1, arg2);
-}
-
-__host__ inline static int max(int arg1, int arg2) {
-  return std::max(arg1, arg2);
-}
-#endif // !defined(__HIPCC_RTC__)
-
-__DEVICE__
-inline float pow(float base, int iexp) {
-  return powif(base, iexp);
-}
-
-__DEVICE__
-inline double pow(double base, int iexp) {
-  return powi(base, iexp);
-}
-
-__DEVICE__
-inline _Float16 pow(_Float16 base, int iexp) {
-  return __ocml_pown_f16(base, iexp);
-}
-
-#pragma pop_macro("__DEF_FLOAT_FUN")
-#pragma pop_macro("__DEF_FLOAT_FUN2")
-#pragma pop_macro("__DEF_FLOAT_FUN2I")
-#pragma pop_macro("__HIP_OVERLOAD")
-#pragma pop_macro("__HIP_OVERLOAD2")
-
-#endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
-
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__RETURN_TYPE")
-
 // For backward compatibility.
 // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
 // defined after including math_functions.h.
+#if !defined(__HIPCC_RTC__)
 #include <hip/amd_detail/amd_hip_runtime.h>
+#endif
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h
index bdba2ef16e..5974fb6374 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/amd_surface_functions.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2018 - 2022 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -23,12 +23,33 @@ THE SOFTWARE.
 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H
 #define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
+#pragma clang diagnostic ignored "-Wc++17-extensions"
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wunused-template"
+#endif
+
 #if defined(__cplusplus)
 
+#if !defined(__HIPCC_RTC__)
 #include <hip/surface_types.h>
 #include <hip/hip_vector_types.h>
 #include <hip/amd_detail/texture_fetch_functions.h>
 #include <hip/amd_detail/ockl_image.h>
+#endif
+
+#if defined(__HIPCC_RTC__)
+#define __HOST_DEVICE__ __device__
+#else
+#define __HOST_DEVICE__ __host__ __device__
+#endif
 
 #define __HIP_SURFACE_OBJECT_PARAMETERS_INIT                                                            \
     unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObj; 
@@ -232,4 +253,9 @@ static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfa
 }
 
 #endif
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
 #endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h
index 0222870590..c055ad1d8a 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -31,7 +31,16 @@ THE SOFTWARE.
 #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H
 #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include "hip/amd_detail/host_defines.h"
+#endif
 
 typedef unsigned char uchar;
 typedef unsigned short ushort;
@@ -128,4 +137,8 @@ __device__ inline static __local void* __to_local(unsigned x) { return (__local
 #define __CLK_LOCAL_MEM_FENCE    0x01
 typedef unsigned __cl_mem_fence_flags;
 
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
 #endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h b/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h
index 9e999268ea..6c471c5fa1 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/math_fwd.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -22,13 +22,22 @@ THE SOFTWARE.
 
 #pragma once
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wundef"
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include "host_defines.h"
+#endif
+
 #if defined(__cplusplus)
     extern "C" {
 #endif
 
 // DOT FUNCTIONS
-#if __HIP_CLANG_ONLY__
+#if defined(__clang__) && defined(__HIP__)
 __device__
 __attribute__((const))
 int __ockl_sdot2(
@@ -692,3 +701,7 @@ double __ocml_fma_rtz_f64(double, double, double);
 #if defined(__cplusplus)
     } // extern "C"
 #endif
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h b/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h
index a3fa616cc5..136c6e352f 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/ockl_image.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -22,7 +22,14 @@ THE SOFTWARE.
 
 #pragma once
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+
+#if !defined(__HIPCC_RTC__)
 #include <hip/hip_vector_types.h>
+#endif
 
 extern "C" {
 
@@ -172,4 +179,8 @@ __device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT
 
 __device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i);
 
-};
\ No newline at end of file
+}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
\ No newline at end of file
diff --git a/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h b/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h
index f1cd70ab05..baf2b28039 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/texture_fetch_functions.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -22,13 +22,24 @@ THE SOFTWARE.
 
 #pragma once
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++17-extensions"
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wunused-template"
+#endif
+
 #if defined(__cplusplus)
 
+#if !defined(__HIPCC_RTC__)
 #include <hip/hip_vector_types.h>
 #include <hip/hip_texture_types.h>
 #include <hip/amd_detail/ockl_image.h>
-
-#if !defined(__HIPCC_RTC__)
 #include <type_traits>
 #endif // !defined(__HIPCC_RTC__)
 
@@ -490,3 +501,7 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t<T, rea
 }
 
 #endif
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h b/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h
index 85ebfa862f..54e77723e8 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/texture_indirect_functions.h
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -22,14 +22,24 @@ THE SOFTWARE.
 
 #pragma once
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++17-extensions"
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wunused-template"
+#endif
+
 #if defined(__cplusplus)
 
+#if !defined(__HIPCC_RTC__)
 #include <hip/hip_vector_types.h>
 #include <hip/hip_texture_types.h>
 #include <hip/amd_detail/texture_fetch_functions.h>
 #include <hip/amd_detail/ockl_image.h>
-
-#if !defined(__HIPCC_RTC__)
 #include <type_traits>
 #endif // !defined(__HIPCC_RTC__)
 
@@ -208,7 +218,7 @@ static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject
         return __hipMapFrom<T>(tmp);
         break;
     }
-    };
+    }
     return {};
 }
 
@@ -477,3 +487,7 @@ static __device__ __hip_img_chk__ void texCubemapLayeredGrad(T *ptr, hipTextureO
 }
 
 #endif
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
\ No newline at end of file
diff --git a/projects/clr/hipamd/packaging/CMakeLists.txt b/projects/clr/hipamd/packaging/CMakeLists.txt
index d0eb8fe27d..cef02d4296 100644
--- a/projects/clr/hipamd/packaging/CMakeLists.txt
+++ b/projects/clr/hipamd/packaging/CMakeLists.txt
@@ -60,7 +60,7 @@ if(HIP_PLATFORM STREQUAL "amd" )
 
 #TODO:This do not belong in BINARY package.
 #Keeping it as is for now
-install(FILES ${CMAKE_BINARY_DIR}/hipamd/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary)
+install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/.hipInfo DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary)
 
 install ( EXPORT hip-targets  FILE hip-targets.cmake NAMESPACE hip:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip  COMPONENT binary)
 install ( EXPORT hip-lang-targets  FILE hip-lang-targets.cmake NAMESPACE hip-lang:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip-lang  COMPONENT binary)
@@ -94,11 +94,11 @@ install(FILES ${CMAKE_BINARY_DIR}/hipamd/include/hip/amd_detail/hip_prof_str.h
         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/amd_detail COMPONENT dev)
 install(FILES ${CMAKE_BINARY_DIR}/hipamd/include/hip/hip_version.h
         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev)
-install(FILES ${CMAKE_BINARY_DIR}/hipamd/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip COMPONENT dev)
+install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip COMPONENT dev)
 # .hipVersion is added to satisfy Windows compute build.
 #TODO to be removed
 if(WIN32)
-    install(FILES ${CMAKE_BINARY_DIR}/hipamd/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion COMPONENT dev)
+    install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/version DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME .hipVersion COMPONENT dev)
 endif()
 install(DIRECTORY ${HIP_COMMON_DIR}/cmake/ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT dev)
 install(FILES ${CMAKE_BINARY_DIR}/hipamd/hip-config.cmake ${CMAKE_BINARY_DIR}/hipamd/hip-config-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/hip COMPONENT dev)
diff --git a/projects/clr/hipamd/src/CMakeLists.txt b/projects/clr/hipamd/src/CMakeLists.txt
index bd791e2774..97f4d0ea36 100644
--- a/projects/clr/hipamd/src/CMakeLists.txt
+++ b/projects/clr/hipamd/src/CMakeLists.txt
@@ -261,9 +261,11 @@ endif()
 target_compile_definitions(amdhip64 PUBLIC USE_PROF_API=1)
 
 add_custom_command(TARGET amdhip64 POST_BUILD COMMAND
-  ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/.hipInfo ${PROJECT_BINARY_DIR}/lib/.hipInfo)
+  ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/share/hip/.hipInfo ${PROJECT_BINARY_DIR}/lib/.hipInfo)
 add_custom_command(TARGET amdhip64 POST_BUILD COMMAND
   ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/include ${PROJECT_BINARY_DIR}/include)
+add_custom_command(TARGET amdhip64 POST_BUILD COMMAND
+  ${CMAKE_COMMAND} -E copy_directory ${HIP_COMMON_INCLUDE_DIR} ${PROJECT_BINARY_DIR}/include)
 
 add_library(host INTERFACE)
 target_link_libraries(host INTERFACE amdhip64)
diff --git a/projects/clr/hipamd/src/hiprtc/CMakeLists.txt b/projects/clr/hipamd/src/hiprtc/CMakeLists.txt
index 8bafd2ece2..39c1c6a12a 100644
--- a/projects/clr/hipamd/src/hiprtc/CMakeLists.txt
+++ b/projects/clr/hipamd/src/hiprtc/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 - 2022 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2020 - 2023 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -129,12 +129,33 @@ set(HIPRTC_GEN_HEADER "${HIPRTC_GEN_DIR}/hipRTC_header.h")
 set(HIPRTC_GEN_MCIN "${HIPRTC_GEN_DIR}/hipRTC_header.mcin")
 set(HIPRTC_GEN_PREPROCESSED "${HIPRTC_GEN_DIR}/hipRTC")
 set(HIPRTC_GEN_OBJ "${HIPRTC_GEN_DIR}/hipRTC_header${CMAKE_CXX_OUTPUT_EXTENSION}")
-set(HIPRTC_WARP_FUNCS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_warp_functions.h")
-set(HIPRTC_FP16_MATH_FWD "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_fp16_math_fwd.h")
-set(HIPRTC_FP16_FUNCS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_fp16.h")
-set(HIPRTC_COOP_GROUPS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_cooperative_groups.h")
-set(HIPRTC_COOP_GRPS_HELPER "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_cooperative_groups_helper.h")
-set(HIPRTC_UNSAFE_ATOMICS "${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_unsafe_atomics.h")
+# list of headers which needs to be appended to the hiprtc preprocessed file
+set(HIPRTC_HEADERS
+${HIP_COMMON_INCLUDE_DIR}/hip/hip_common.h
+${HIP_COMMON_INCLUDE_DIR}/hip/library_types.h
+${HIP_COMMON_INCLUDE_DIR}/hip/driver_types.h
+${HIP_COMMON_INCLUDE_DIR}/hip/surface_types.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_channel_descriptor.h
+${HIP_COMMON_INCLUDE_DIR}/hip/texture_types.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/ockl_image.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/texture_fetch_functions.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/texture_indirect_functions.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_surface_functions.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_complex.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_math_constants.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/math_fwd.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/device_library_decls.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_device_functions.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_warp_functions.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_cooperative_groups_helper.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_cooperative_groups.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_unsafe_atomics.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_atomic.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/math_fwd.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_fp16_math_fwd.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_fp16.h
+${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_math_functions.h
+)
 
 # Generate required HIPRTC files.
 FILE(MAKE_DIRECTORY ${HIPRTC_GEN_DIR})
@@ -147,7 +168,7 @@ generate_hiprtc_mcin("${HIPRTC_GEN_MCIN}" "${HIPRTC_GEN_PREPROCESSED}")
 add_custom_command(
   OUTPUT ${HIPRTC_GEN_PREPROCESSED}
   COMMAND $<TARGET_FILE:clang> -O3 --rocm-path=${PROJECT_SOURCE_DIR}/include/.. -std=c++17 -nogpulib --hip-version=3.6 -isystem ${HIP_COMMON_INCLUDE_DIR} -isystem ${PROJECT_SOURCE_DIR}/include -isystem ${PROJECT_BINARY_DIR}/include -isystem ${CMAKE_CURRENT_SOURCE_DIR}/include --cuda-device-only -D__HIPCC_RTC__ -x hip ${HIPRTC_GEN_HEADER} -E -o ${HIPRTC_GEN_PREPROCESSED}
-  COMMAND ${CMAKE_COMMAND} -DHIPRTC_ADD_MACROS=1 -DHIPRTC_WARP_HEADER_FILE=${HIPRTC_WARP_FUNCS} -DHIPRTC_COOP_HEADER_FILE=${HIPRTC_COOP_GROUPS} -DHIPRTC_COOP_HELPER_FILE=${HIPRTC_COOP_GRPS_HELPER} -DHIPRTC_UNSAFE_ATOMICS_FILE=${HIPRTC_UNSAFE_ATOMICS} -DHIPRTC_FP16_MATH_FWD_FILE=${HIPRTC_FP16_MATH_FWD} -DHIPRTC_FP16_HEADER_FILE=${HIPRTC_FP16_FUNCS} -DHIPRTC_PREPROCESSED_FILE=${HIPRTC_GEN_PREPROCESSED} -P ${HIPRTC_CMAKE}
+  COMMAND ${CMAKE_COMMAND} -DHIPRTC_ADD_MACROS=1 -DHIPRTC_HEADERS="${HIPRTC_HEADERS}" -DHIPRTC_PREPROCESSED_FILE=${HIPRTC_GEN_PREPROCESSED} -P ${HIPRTC_CMAKE}
   DEPENDS clang ${HIPRTC_GEN_HEADER})
 add_custom_command(
   OUTPUT ${HIPRTC_GEN_OBJ}
diff --git a/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake b/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake
index 730d013e28..187e961648 100644
--- a/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake
+++ b/projects/clr/hipamd/src/hiprtc/cmake/HIPRTC.cmake
@@ -30,6 +30,8 @@ function(get_hiprtc_macros HIPRTC_DEFINES)
 "#pragma clang diagnostic push\n\
 #pragma clang diagnostic ignored \"-Wreserved-id-macro\"\n\
 #pragma clang diagnostic ignored \"-Wc++98-compat-pedantic\"\n\
+#pragma clang diagnostic ignored \"-Wreserved-macro-identifier\"\n\
+#pragma clang diagnostic ignored \"-Wundef\"\n\
 #define __device__ __attribute__((device))\n\
 #define __host__ __attribute__((host))\n\
 #define __global__ __attribute__((global))\n\
@@ -40,7 +42,11 @@ function(get_hiprtc_macros HIPRTC_DEFINES)
 #define __noinline__ __attribute__((noinline))\n\
 #endif\n\
 #define __forceinline__ inline __attribute__((always_inline))\n\
-
+#if __HIP_NO_IMAGE_SUPPORT\n\
+#define __hip_img_chk__ __attribute__((unavailable(\"The image/texture API not supported on the device\")))\n\
+#else\n\
+#define __hip_img_chk__\n\
+#endif\n\
 #define launch_bounds_impl0(requiredMaxThreadsPerBlock)                                       \\\n\
     __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))\n\
 #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)           \\\n\
@@ -49,13 +55,11 @@ function(get_hiprtc_macros HIPRTC_DEFINES)
 #define select_impl_(_1, _2, impl_, ...) impl_\n\
 #define __launch_bounds__(...)                                                                \\\n\
     select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)           \n\
-#pragma clang diagnostic pop\n\
 #define HIP_INCLUDE_HIP_HIP_RUNTIME_H\n\
-#pragma clang diagnostic push\n\
-#pragma clang diagnostic ignored \"-Wreserved-macro-identifier\"\n\
 #define _HIP_BFLOAT16_H_\n\
-#pragma clang diagnostic pop\n\
-#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H"
+#define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H\n\
+#define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H\n\
+#pragma clang diagnostic pop"
   PARENT_SCOPE)
 endfunction(get_hiprtc_macros)
 
@@ -64,20 +68,15 @@ if(HIPRTC_ADD_MACROS)
   message(STATUS "Appending hiprtc macros to ${HIPRTC_PREPROCESSED_FILE}.")
   get_hiprtc_macros(HIPRTC_DEFINES)
   FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_DEFINES}")
-  FILE(READ "${HIPRTC_WARP_HEADER_FILE}" HIPRTC_WARP_HEADER)
-  FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_WARP_HEADER}")
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wreserved-macro-identifier"
-  FILE(READ "${HIPRTC_COOP_HELPER_FILE}" HIPRTC_COOP_HELPER)
-  FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_COOP_HELPER}")
-  FILE(READ "${HIPRTC_COOP_HEADER_FILE}" HIPRTC_COOP_HEADER)
-  FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_COOP_HEADER}")
-  FILE(READ "${HIPRTC_UNSAFE_ATOMICS_FILE}" HIPRTC_UNSAFE_ATOMICS)
-  FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_UNSAFE_ATOMICS}")
-  FILE(READ "${HIPRTC_FP16_MATH_FWD_FILE}" HIPRTC_FP16_MATH_FWD)
-  FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_FP16_MATH_FWD}")
-  FILE(READ "${HIPRTC_FP16_HEADER_FILE}" HIPRTC_FP16_HEADER)
-  FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_FP16_HEADER}")
+  set(HIPRTC_HEADER_LIST ${HIPRTC_HEADERS})
+  separate_arguments(HIPRTC_HEADER_LIST)
+# appends all the headers from the list to the hiprtc preprocessed file
+  foreach(header ${HIPRTC_HEADER_LIST})
+    FILE(READ "${header}" HEADER_FILE)
+    FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HEADER_FILE}")
+  endforeach()
 #pragma clang diagnostic pop
 endif()