Merge pull request #1058 from mhbliao/hliao/master/devfunc
[Device Function] Fix implementation
[ROCm/hip commit: 37d01a7da9]
Этот коммит содержится в:
@@ -103,7 +103,7 @@ __device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigne
|
||||
__device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) {
|
||||
uint64_t offset = src2 & 63;
|
||||
uint64_t width = src3 & 63;
|
||||
uint64_t mask = (1 << width) - 1;
|
||||
uint64_t mask = (1ULL << width) - 1;
|
||||
return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
|
||||
}
|
||||
|
||||
@@ -753,7 +753,7 @@ int64_t __lanemask_gt()
|
||||
int32_t activelane = __ockl_activelane_u32();
|
||||
int64_t ballot = __ballot64(1);
|
||||
if (activelane != 63) {
|
||||
int64_t tmp = (~0UL) << (activelane + 1);
|
||||
int64_t tmp = (~0ULL) << (activelane + 1);
|
||||
return tmp & ballot;
|
||||
}
|
||||
return 0;
|
||||
|
||||
@@ -46,7 +46,7 @@ T bit_insert(T src0, T src1, unsigned int src2, unsigned int src3) {
|
||||
unsigned int bits = sizeof(T) * 8;
|
||||
T offset = src2 & (bits - 1);
|
||||
T width = src3 & (bits - 1);
|
||||
T mask = (1 << width) - 1;
|
||||
T mask = (((T)1) << width) - 1;
|
||||
return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user