From e637e72364656b97a846c538d53b27a36c3290cc Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Fri, 26 Apr 2019 14:51:25 -0400 Subject: [PATCH] [Device Function] Fix implementation of `__bitinsert_u64` - It's a common mistake by assuming 1 << shamt would be promoted to 64-bit, if shamt is a 64-bit integer. That's not the case. Replace that left shift to a 64-bit one to ensure it won't fall into undefined behavior. - Fix the host-side implementation as well for device function testing. [ROCm/hip commit: 9bd2d5746d31a1c5d2578319e934120a879e2506] --- projects/hip/include/hip/hcc_detail/device_functions.h | 4 ++-- projects/hip/tests/src/deviceLib/hip_bitinsert.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/device_functions.h b/projects/hip/include/hip/hcc_detail/device_functions.h index 044ca1e634..e75c0b6526 100644 --- a/projects/hip/include/hip/hcc_detail/device_functions.h +++ b/projects/hip/include/hip/hcc_detail/device_functions.h @@ -103,7 +103,7 @@ __device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigne __device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) { uint64_t offset = src2 & 63; uint64_t width = src3 & 63; - uint64_t mask = (1 << width) - 1; + uint64_t mask = (1ULL << width) - 1; return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); } @@ -753,7 +753,7 @@ int64_t __lanemask_gt() int32_t activelane = __ockl_activelane_u32(); int64_t ballot = __ballot64(1); if (activelane != 63) { - int64_t tmp = (~0UL) << (activelane + 1); + int64_t tmp = (~0ULL) << (activelane + 1); return tmp & ballot; } return 0; diff --git a/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp b/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp index bf00f4143a..bf1146b91a 100644 --- a/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp +++ b/projects/hip/tests/src/deviceLib/hip_bitinsert.cpp @@ -46,7 +46,7 @@ T bit_insert(T src0, T src1, unsigned int src2, unsigned int src3) { unsigned int bits = sizeof(T) * 8; T offset = src2 & (bits - 1); T width = src3 & (bits - 1); - T mask = (1 << width) - 1; + T mask = (((T)1) << width) - 1; return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset)); }