From c19350372204bcb34a8e210639160b67eb4d7465 Mon Sep 17 00:00:00 2001 From: Satyanvesh Dittakavi Date: Mon, 9 Jan 2023 12:18:32 +0000 Subject: [PATCH] SWDEV-376386 - Support __hip_move_dpp with hipRTC Change-Id: If9a6fe23d6df143a7f522823fca7183416048c6a --- .../hip/amd_detail/amd_device_functions.h | 50 ------------------- .../include/hip/amd_detail/amd_hip_runtime.h | 17 ------- .../hip/amd_detail/amd_warp_functions.h | 50 +++++++++++++++++++ 3 files changed, 50 insertions(+), 67 deletions(-) diff --git a/hipamd/include/hip/amd_detail/amd_device_functions.h b/hipamd/include/hip/amd_detail/amd_device_functions.h index 664f425396..f2f849db5a 100644 --- a/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -338,56 +338,6 @@ static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __ HIP specific device functions */ -__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { - union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); - return tmp.u; -} - -__device__ static inline float __hip_ds_bpermutef(int index, float src) { - union { int i; unsigned u; float f; } tmp; tmp.f = src; - tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); - return tmp.f; -} - -__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { - union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); - return tmp.u; -} - -__device__ static inline float __hip_ds_permutef(int index, float src) { - union { int i; unsigned u; float f; } tmp; tmp.f = src; - tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); - return tmp.f; -} - -#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src)) -#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src)) - -template -__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) { - union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); - return tmp.u; -} - -template -__device__ static inline float __hip_ds_swizzlef_N(float src) { - union { int i; unsigned u; float f; } tmp; tmp.f = src; - tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); - return tmp.f; -} - -#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \ - __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src)) - -template -__device__ static inline int __hip_move_dpp_N(int src) { - return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask, - bound_ctrl); -} - #if !defined(__HIPCC_RTC__) #include "amd_warp_functions.h" #endif diff --git a/hipamd/include/hip/amd_detail/amd_hip_runtime.h b/hipamd/include/hip/amd_detail/amd_hip_runtime.h index 851e3b2f74..f3df0473d5 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_runtime.h +++ b/hipamd/include/hip/amd_detail/amd_hip_runtime.h @@ -170,23 +170,6 @@ extern int HIP_TRACE_API; __host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } #endif // !defined(__HIPCC_RTC__) -#if __HIP_ARCH_GFX701__ == 0 - -__device__ unsigned __hip_ds_bpermute(int index, unsigned src); -__device__ float __hip_ds_bpermutef(int index, float src); -__device__ unsigned __hip_ds_permute(int index, unsigned src); -__device__ float __hip_ds_permutef(int index, float src); - -template -__device__ unsigned __hip_ds_swizzle_N(unsigned int src); -template -__device__ float __hip_ds_swizzlef_N(float src); - -template -__device__ int __hip_move_dpp_N(int src); - -#endif //__HIP_ARCH_GFX803__ == 1 - // End doxygen API: /** * @} diff --git a/hipamd/include/hip/amd_detail/amd_warp_functions.h b/hipamd/include/hip/amd_detail/amd_warp_functions.h index ebdc86374d..b18ff5f5d7 100644 --- a/hipamd/include/hip/amd_detail/amd_warp_functions.h +++ b/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -23,6 +23,56 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H +__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { + union { int i; unsigned u; float f; } tmp; tmp.u = src; + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_bpermutef(int index, float src) { + union { int i; unsigned u; float f; } tmp; tmp.f = src; + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); + return tmp.f; +} + +__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { + union { int i; unsigned u; float f; } tmp; tmp.u = src; + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); + return tmp.u; +} + +__device__ static inline float __hip_ds_permutef(int index, float src) { + union { int i; unsigned u; float f; } tmp; tmp.f = src; + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); + return tmp.f; +} + +#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src)) +#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src)) + +template +__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) { + union { int i; unsigned u; float f; } tmp; tmp.u = src; + tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.u; +} + +template +__device__ static inline float __hip_ds_swizzlef_N(float src) { + union { int i; unsigned u; float f; } tmp; tmp.f = src; + tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern); + return tmp.f; +} + +#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \ + __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src)) + +template +__device__ static inline int __hip_move_dpp_N(int src) { + return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask, + bound_ctrl); +} + static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE; __device__