From 1d88a5df2c64cc5dd103d94f2cc210de82b80696 Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Fri, 15 Jul 2022 09:22:46 -0700 Subject: [PATCH] SWDEV-343278 - fix conversion functions Change-Id: Ib93f886363314e980462ef562c998981bb958569 [ROCm/clr commit: a47887aa0d6ddd7e98a8648f7e06f6a39e7b5423] --- .../hip/amd_detail/amd_device_functions.h | 91 +++++++++++++------ .../hip/amd_detail/device_library_decls.h | 27 ++++++ 2 files changed, 91 insertions(+), 27 deletions(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h index 44610d4679..22654b8ccb 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -426,15 +426,16 @@ __device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) { return out; } -/* - * Rounding modes are not yet supported in HIP - * TODO: Conversion functions are not correct, need to fix when BE is ready -*/ - -__device__ static inline float __double2float_rd(double x) { return (double)x; } -__device__ static inline float __double2float_rn(double x) { return (double)x; } -__device__ static inline float __double2float_ru(double x) { return (double)x; } -__device__ static inline float __double2float_rz(double x) { return (double)x; } +__device__ static inline float __double2float_rd(double x) { + return __ocml_cvtrtn_f32_f64(x); +} +__device__ static inline float __double2float_rn(double x) { return x; } +__device__ static inline float __double2float_ru(double x) { + return __ocml_cvtrtp_f32_f64(x); +} +__device__ static inline float __double2float_rz(double x) { + return __ocml_cvtrtz_f32_f64(x); +} __device__ static inline int __double2hiint(double x) { static_assert(sizeof(double) == 2 * sizeof(int), ""); @@ -586,10 +587,16 @@ __device__ static inline double __hiloint2double(int hi, int lo) { __device__ static inline double __int2double_rn(int x) { return (double)x; } -__device__ static inline float __int2float_rd(int x) { return (float)x; } +__device__ static inline float __int2float_rd(int x) { + return __ocml_cvtrtn_f32_s32(x); +} __device__ static inline float __int2float_rn(int x) { return (float)x; } -__device__ static inline float __int2float_ru(int x) { return (float)x; } -__device__ static inline float __int2float_rz(int x) { return (float)x; } +__device__ static inline float __int2float_ru(int x) { + return __ocml_cvtrtp_f32_s32(x); +} +__device__ static inline float __int2float_rz(int x) { + return __ocml_cvtrtz_f32_s32(x); +} __device__ static inline float __int_as_float(int x) { static_assert(sizeof(float) == sizeof(int), ""); @@ -600,15 +607,27 @@ __device__ static inline float __int_as_float(int x) { return tmp; } -__device__ static inline double __ll2double_rd(long long int x) { return (double)x; } +__device__ static inline double __ll2double_rd(long long int x) { + return __ocml_cvtrtn_f64_s64(x); +} __device__ static inline double __ll2double_rn(long long int x) { return (double)x; } -__device__ static inline double __ll2double_ru(long long int x) { return (double)x; } -__device__ static inline double __ll2double_rz(long long int x) { return (double)x; } +__device__ static inline double __ll2double_ru(long long int x) { + return __ocml_cvtrtp_f64_s64(x); +} +__device__ static inline double __ll2double_rz(long long int x) { + return __ocml_cvtrtz_f64_s64(x); +} -__device__ static inline float __ll2float_rd(long long int x) { return (float)x; } +__device__ static inline float __ll2float_rd(long long int x) { + return __ocml_cvtrtn_f32_s64(x); +} __device__ static inline float __ll2float_rn(long long int x) { return (float)x; } -__device__ static inline float __ll2float_ru(long long int x) { return (float)x; } -__device__ static inline float __ll2float_rz(long long int x) { return (float)x; } +__device__ static inline float __ll2float_ru(long long int x) { + return __ocml_cvtrtp_f32_s64(x); +} +__device__ static inline float __ll2float_rz(long long int x) { + return __ocml_cvtrtz_f32_s64(x); +} __device__ static inline double __longlong_as_double(long long int x) { static_assert(sizeof(double) == sizeof(long long), ""); @@ -621,10 +640,16 @@ __device__ static inline double __longlong_as_double(long long int x) { __device__ static inline double __uint2double_rn(int x) { return (double)x; } -__device__ static inline float __uint2float_rd(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_rd(unsigned int x) { + return __ocml_cvtrtn_f32_u32(x); +} __device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; } -__device__ static inline float __uint2float_ru(unsigned int x) { return (float)x; } -__device__ static inline float __uint2float_rz(unsigned int x) { return (float)x; } +__device__ static inline float __uint2float_ru(unsigned int x) { + return __ocml_cvtrtp_f32_u32(x); +} +__device__ static inline float __uint2float_rz(unsigned int x) { + return __ocml_cvtrtz_f32_u32(x); +} __device__ static inline float __uint_as_float(unsigned int x) { static_assert(sizeof(float) == sizeof(unsigned int), ""); @@ -635,15 +660,27 @@ __device__ static inline float __uint_as_float(unsigned int x) { return tmp; } -__device__ static inline double __ull2double_rd(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_rd(unsigned long long int x) { + return __ocml_cvtrtn_f64_u64(x); +} __device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; } -__device__ static inline double __ull2double_ru(unsigned long long int x) { return (double)x; } -__device__ static inline double __ull2double_rz(unsigned long long int x) { return (double)x; } +__device__ static inline double __ull2double_ru(unsigned long long int x) { + return __ocml_cvtrtp_f64_u64(x); +} +__device__ static inline double __ull2double_rz(unsigned long long int x) { + return __ocml_cvtrtz_f64_u64(x); +} -__device__ static inline float __ull2float_rd(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_rd(unsigned long long int x) { + return __ocml_cvtrtn_f32_u64(x); +} __device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; } -__device__ static inline float __ull2float_ru(unsigned long long int x) { return (float)x; } -__device__ static inline float __ull2float_rz(unsigned long long int x) { return (float)x; } +__device__ static inline float __ull2float_ru(unsigned long long int x) { + return __ocml_cvtrtp_f32_u64(x); +} +__device__ static inline float __ull2float_rz(unsigned long long int x) { + return __ocml_cvtrtz_f32_u64(x); +} #if __HIP_CLANG_ONLY__ diff --git a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h index 9f95f33b73..8add4fa279 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h +++ b/projects/clr/hipamd/include/hip/amd_detail/device_library_decls.h @@ -62,6 +62,33 @@ extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float); extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float); extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_f64(double); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_f64(double); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_f64(double); + +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float); +extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float); + +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s32(int); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u32(uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u32(uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u32(uint32_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s64(int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s64(int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s64(int64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u64(uint64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u64(uint64_t); +extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u64(uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_s64(int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_s64(int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_s64(int64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_u64(uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_u64(uint64_t); +extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_u64(uint64_t); + extern "C" __device__ __attribute__((convergent)) void __ockl_gws_init(uint nwm1, uint rid); extern "C" __device__ __attribute__((convergent)) void __ockl_gws_barrier(uint nwm1, uint rid);