diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index eaee437cea..7bc0b97617 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -85,11 +85,11 @@ __device__ static inline unsigned int __ffsll(long long int input) { } __device__ static inline unsigned int __brev(unsigned int input) { - return __llvm_bitrev_b32(input); + return __builtin_bitreverse32(input); } __device__ static inline unsigned long long int __brevll(unsigned long long int input) { - return __llvm_bitrev_b64(input); + return __builtin_bitreverse64(input); } __device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) { @@ -233,7 +233,10 @@ __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, uns return __ockl_sadd_u32(x, y, z); } -__device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } +__device__ static inline unsigned int __lane_id() { + return __builtin_amdgcn_mbcnt_hi( + -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); +} /* HIP specific device functions @@ -241,25 +244,25 @@ HIP specific device functions __device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); return tmp.u; } __device__ static inline float __hip_ds_bpermutef(int index, float src) { union { int i; unsigned u; float f; } tmp; tmp.f = src; - tmp.i = __llvm_amdgcn_ds_bpermute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); return tmp.f; } __device__ static inline unsigned __hip_ds_permute(int index, unsigned src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); return tmp.u; } __device__ static inline float __hip_ds_permutef(int index, float src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; - tmp.i = __llvm_amdgcn_ds_permute(index, tmp.i); + tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i); return tmp.u; } @@ -293,8 +296,8 @@ __device__ static inline float __hip_ds_swizzlef_N(float src) { template __device__ static inline int __hip_move_dpp_N(int src) { - return __llvm_amdgcn_move_dpp(src, dpp_ctrl, row_mask, bank_mask, - bound_ctrl); + return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask, + bound_ctrl); } static constexpr int warpSize = 64; @@ -304,7 +307,7 @@ inline int __shfl(int var, int src_lane, int width = warpSize) { int self = __lane_id(); int index = src_lane + (self & ~(width-1)); - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline @@ -376,7 +379,7 @@ int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) { int self = __lane_id(); int index = self - lane_delta; index = (index < (self & ~(width-1)))?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline @@ -446,7 +449,7 @@ int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) { int self = __lane_id(); int index = self + lane_delta; index = (int)((self&(width-1))+lane_delta) >= width?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline @@ -516,7 +519,7 @@ int __shfl_xor(int var, int lane_mask, int width = warpSize) { int self = __lane_id(); int index = self^lane_mask; index = index >= ((self+width)&~(width-1))?self:index; - return __llvm_amdgcn_ds_bpermute(index<<2, var); + return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__ inline diff --git a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h index dc6fd05c52..330b3d91c2 100644 --- a/hipamd/include/hip/hcc_detail/llvm_intrinsics.h +++ b/hipamd/include/hip/hcc_detail/llvm_intrinsics.h @@ -31,40 +31,11 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -__device__ -__attribute__((convergent)) -ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); - +// FIXME: These should all be removed and proper builtins used. __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); -__device__ -unsigned int __llvm_bitrev_b32(unsigned int src0) __asm("llvm.bitreverse.i32"); - -__device__ -uint64_t __llvm_bitrev_b64(uint64_t src0) __asm("llvm.bitreverse.i64"); - -extern -__device__ -__attribute__((const)) -unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); - -extern -__device__ -__attribute__((const)) -unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); - -__device__ -int __llvm_amdgcn_ds_bpermute(int index, int src) __asm("llvm.amdgcn.ds.bpermute"); - -__device__ -int __llvm_amdgcn_ds_permute(int index, int src) __asm("llvm.amdgcn.ds.permute"); - __device__ int __llvm_amdgcn_ds_swizzle(int index, int pattern) __asm("llvm.amdgcn.ds.swizzle"); -__device__ -int __llvm_amdgcn_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, - bool bound_ctrl) __asm("llvm.amdgcn.mov.dpp.i32"); - #endif diff --git a/hipamd/tests/src/deviceLib/hip_mbcnt.cpp b/hipamd/tests/src/deviceLib/hip_mbcnt.cpp index cd4bfa5daa..2cb958f280 100644 --- a/hipamd/tests/src/deviceLib/hip_mbcnt.cpp +++ b/hipamd/tests/src/deviceLib/hip_mbcnt.cpp @@ -38,11 +38,11 @@ THE SOFTWARE. __global__ void HIP_kernel(unsigned int* mbcnt_lo, unsigned int* mbcnt_hi, unsigned int* lane_id) { int x = blockDim.x * blockIdx.x + threadIdx.x; - mbcnt_lo[x] = __mbcnt_lo(0xFFFFFFFF, 0); - mbcnt_hi[x] = __mbcnt_hi(0xFFFFFFFF, 0); + mbcnt_lo[x] = __builtin_amdgcn_mbcnt_lo(0xFFFFFFFF, 0); + mbcnt_hi[x] = __builtin_amdgcn_mbcnt_hi(0xFFFFFFFF, 0); lane_id[x] = __lane_id(); } - + using namespace std; int main() {