SWDEV-376386 - Support __hip_move_dpp with hipRTC

Change-Id: If9a6fe23d6df143a7f522823fca7183416048c6a
Этот коммит содержится в:
Satyanvesh Dittakavi
2023-01-09 12:18:32 +00:00
родитель 33eef6d95e
Коммит c193503722
3 изменённых файлов: 50 добавлений и 67 удалений
-50
Просмотреть файл
@@ -338,56 +338,6 @@ static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __
HIP specific device functions
*/
__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
return tmp.u;
}
__device__ static inline float __hip_ds_bpermutef(int index, float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
return tmp.f;
}
__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
return tmp.u;
}
__device__ static inline float __hip_ds_permutef(int index, float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
return tmp.f;
}
#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
template <int pattern>
__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
return tmp.u;
}
template <int pattern>
__device__ static inline float __hip_ds_swizzlef_N(float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
return tmp.f;
}
#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
__hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
__device__ static inline int __hip_move_dpp_N(int src) {
return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
bound_ctrl);
}
#if !defined(__HIPCC_RTC__)
#include "amd_warp_functions.h"
#endif
-17
Просмотреть файл
@@ -170,23 +170,6 @@ extern int HIP_TRACE_API;
__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
#endif // !defined(__HIPCC_RTC__)
#if __HIP_ARCH_GFX701__ == 0
__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
__device__ float __hip_ds_bpermutef(int index, float src);
__device__ unsigned __hip_ds_permute(int index, unsigned src);
__device__ float __hip_ds_permutef(int index, float src);
template <int pattern>
__device__ unsigned __hip_ds_swizzle_N(unsigned int src);
template <int pattern>
__device__ float __hip_ds_swizzlef_N(float src);
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
__device__ int __hip_move_dpp_N(int src);
#endif //__HIP_ARCH_GFX803__ == 1
// End doxygen API:
/**
* @}
+50
Просмотреть файл
@@ -23,6 +23,56 @@ THE SOFTWARE.
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H
#define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H
__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
return tmp.u;
}
__device__ static inline float __hip_ds_bpermutef(int index, float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
return tmp.f;
}
__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
return tmp.u;
}
__device__ static inline float __hip_ds_permutef(int index, float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
return tmp.f;
}
#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
template <int pattern>
__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
union { int i; unsigned u; float f; } tmp; tmp.u = src;
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
return tmp.u;
}
template <int pattern>
__device__ static inline float __hip_ds_swizzlef_N(float src) {
union { int i; unsigned u; float f; } tmp; tmp.f = src;
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
return tmp.f;
}
#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
__hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
__device__ static inline int __hip_move_dpp_N(int src) {
return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
bound_ctrl);
}
static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
__device__