[Device Function] Support immediate argument.
- `immarg`, immediate argument, is enabled on all AMDGPU intrinsics.
Revise device functions using these intrinsics with immediate
arguments.
[ROCm/hip commit: 950b6efe72]
Этот коммит содержится в:
@@ -46,7 +46,7 @@ __device__ static inline unsigned int __popcll(unsigned long long int input) {
|
||||
}
|
||||
|
||||
__device__ static inline int __clz(int input) {
|
||||
return __ockl_clz_u32((uint)input);
|
||||
return __ockl_clz_u32((uint)input);
|
||||
}
|
||||
|
||||
__device__ static inline int __clzll(long long int input) {
|
||||
@@ -224,56 +224,59 @@ __device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbc
|
||||
HIP specific device functions
|
||||
*/
|
||||
|
||||
// utility union type
|
||||
union __u {
|
||||
int i;
|
||||
unsigned int u;
|
||||
float f;
|
||||
};
|
||||
|
||||
__device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
|
||||
__u tmp; tmp.u = src;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_bpermutef(int index, float src) {
|
||||
__u tmp; tmp.f = src;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
__device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
|
||||
__u tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline float __hip_ds_permutef(int index, float src) {
|
||||
__u tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
__device__ static inline unsigned __hip_ds_swizzle(unsigned int src, int pattern) {
|
||||
__u tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i);
|
||||
return tmp.u;
|
||||
}
|
||||
__device__ static inline float __hip_ds_swizzlef(float src, int pattern) {
|
||||
__u tmp; tmp.f = src;
|
||||
tmp.i = __llvm_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
|
||||
#define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
|
||||
#define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
|
||||
|
||||
template <int pattern>
|
||||
__device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = src;
|
||||
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
return tmp.u;
|
||||
}
|
||||
|
||||
template <int pattern>
|
||||
__device__ static inline float __hip_ds2_swizzlef_N(float src) {
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = src;
|
||||
tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
__device__ static inline int __hip_move_dpp(int src, int dpp_ctrl, int row_mask,
|
||||
int bank_mask, bool bound_ctrl) {
|
||||
return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
|
||||
#define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
|
||||
__hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
|
||||
|
||||
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
|
||||
__device__ static inline int __hip2_move_dpp_N(int src) {
|
||||
return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask,
|
||||
bound_ctrl);
|
||||
}
|
||||
|
||||
static constexpr int warpSize = 64;
|
||||
|
||||
__device__
|
||||
__device__
|
||||
inline
|
||||
int __shfl(int var, int src_lane, int width = warpSize) {
|
||||
int self = __lane_id();
|
||||
@@ -283,14 +286,14 @@ int __shfl(int var, int src_lane, int width = warpSize) {
|
||||
__device__
|
||||
inline
|
||||
unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) {
|
||||
__u tmp; tmp.u = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = var;
|
||||
tmp.i = __shfl(tmp.i, src_lane, width);
|
||||
return tmp.u;
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
float __shfl(float var, int src_lane, int width = warpSize) {
|
||||
__u tmp; tmp.f = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = var;
|
||||
tmp.i = __shfl(tmp.i, src_lane, width);
|
||||
return tmp.f;
|
||||
}
|
||||
@@ -320,14 +323,14 @@ int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) {
|
||||
__device__
|
||||
inline
|
||||
unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) {
|
||||
__u tmp; tmp.u = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = var;
|
||||
tmp.i = __shfl_up(tmp.i, lane_delta, width);
|
||||
return tmp.u;
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) {
|
||||
__u tmp; tmp.f = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = var;
|
||||
tmp.i = __shfl_up(tmp.i, lane_delta, width);
|
||||
return tmp.f;
|
||||
}
|
||||
@@ -357,14 +360,14 @@ int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) {
|
||||
__device__
|
||||
inline
|
||||
unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) {
|
||||
__u tmp; tmp.u = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = var;
|
||||
tmp.i = __shfl_down(tmp.i, lane_delta, width);
|
||||
return tmp.u;
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) {
|
||||
__u tmp; tmp.f = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = var;
|
||||
tmp.i = __shfl_down(tmp.i, lane_delta, width);
|
||||
return tmp.f;
|
||||
}
|
||||
@@ -394,14 +397,14 @@ int __shfl_xor(int var, int lane_mask, int width = warpSize) {
|
||||
__device__
|
||||
inline
|
||||
unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) {
|
||||
__u tmp; tmp.u = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.u = var;
|
||||
tmp.i = __shfl_xor(tmp.i, lane_mask, width);
|
||||
return tmp.u;
|
||||
}
|
||||
__device__
|
||||
inline
|
||||
float __shfl_xor(float var, int lane_mask, int width = warpSize) {
|
||||
__u tmp; tmp.f = var;
|
||||
union { int i; unsigned u; float f; } tmp; tmp.f = var;
|
||||
tmp.i = __shfl_xor(tmp.i, lane_mask, width);
|
||||
return tmp.f;
|
||||
}
|
||||
@@ -681,9 +684,9 @@ inline __attribute((always_inline))
|
||||
long long int __clock64() {
|
||||
// ToDo: Unify HCC and HIP implementation.
|
||||
#if __HCC__
|
||||
return (long long int) __clock_u64();
|
||||
return (long long int) __clock_u64();
|
||||
#else
|
||||
return (long long int) __builtin_amdgcn_s_memrealtime();
|
||||
return (long long int) __builtin_amdgcn_s_memrealtime();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -881,7 +884,7 @@ __device__
|
||||
inline
|
||||
__attribute__((weak))
|
||||
void abort() {
|
||||
return __builtin_trap();
|
||||
return __builtin_trap();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -190,10 +190,13 @@ __device__ float __hip_ds_bpermutef(int index, float src);
|
||||
__device__ unsigned __hip_ds_permute(int index, unsigned src);
|
||||
__device__ float __hip_ds_permutef(int index, float src);
|
||||
|
||||
__device__ unsigned __hip_ds_swizzle(unsigned int src, int pattern);
|
||||
__device__ float __hip_ds_swizzlef(float src, int pattern);
|
||||
template <int pattern>
|
||||
__device__ unsigned __hip_ds_swizzle_N(unsigned int src);
|
||||
template <int pattern>
|
||||
__device__ float __hip_ds_swizzlef_N(float src);
|
||||
|
||||
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl);
|
||||
template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
|
||||
__device__ int __hip_move_dpp_N(int src);
|
||||
|
||||
#endif //__HIP_ARCH_GFX803__ == 1
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user