SWDEV-376386 - Support __hip_move_dpp with hipRTC
Change-Id: If9a6fe23d6df143a7f522823fca7183416048c6a
Этот коммит содержится в:
@@ -338,56 +338,6 @@ static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __
|
||||
HIP specific device functions
|
||||
*/
|
||||
|
||||
__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_bpermutef(int index, float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_permutef(int index, float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
|
||||
#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
|
||||
|
||||
template <int pattern>
|
||||
__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
template <int pattern>
|
||||
__device__ static inline float __hip_ds_swizzlef_N(float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
|
||||
__hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
|
||||
|
||||
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
|
||||
__device__ static inline int __hip_move_dpp_N(int src) {
|
||||
return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
|
||||
bound_ctrl);
|
||||
}
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include "amd_warp_functions.h"
|
||||
#endif
|
||||
|
||||
@@ -170,23 +170,6 @@ extern int HIP_TRACE_API;
|
||||
__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; }
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#if __HIP_ARCH_GFX701__ == 0
|
||||
|
||||
__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
|
||||
__device__ float __hip_ds_bpermutef(int index, float src);
|
||||
__device__ unsigned __hip_ds_permute(int index, unsigned src);
|
||||
__device__ float __hip_ds_permutef(int index, float src);
|
||||
|
||||
template <int pattern>
|
||||
__device__ unsigned __hip_ds_swizzle_N(unsigned int src);
|
||||
template <int pattern>
|
||||
__device__ float __hip_ds_swizzlef_N(float src);
|
||||
|
||||
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
|
||||
__device__ int __hip_move_dpp_N(int src);
|
||||
|
||||
#endif //__HIP_ARCH_GFX803__ == 1
|
||||
|
||||
// End doxygen API:
|
||||
/**
|
||||
* @}
|
||||
|
||||
@@ -23,6 +23,56 @@ THE SOFTWARE.
|
||||
#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H
|
||||
#define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H
|
||||
|
||||
__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_bpermutef(int index, float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_permutef(int index, float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
|
||||
#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
|
||||
|
||||
template <int pattern>
|
||||
__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
template <int pattern>
|
||||
__device__ static inline float __hip_ds_swizzlef_N(float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
|
||||
__hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
|
||||
|
||||
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
|
||||
__device__ static inline int __hip_move_dpp_N(int src) {
|
||||
return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
|
||||
bound_ctrl);
|
||||
}
|
||||
|
||||
static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
|
||||
|
||||
__device__
|
||||
|
||||
Ссылка в новой задаче
Block a user