Merge pull request #657 from yxsamliu/fix-clz
Fix ambiguity of __clz and __clzll
Этот коммит содержится в:
@@ -32,9 +32,6 @@ THE SOFTWARE.
|
||||
#include <hip/hcc_detail/llvm_intrinsics.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned int uint;
|
||||
|
||||
/*
|
||||
Integer Intrinsics
|
||||
*/
|
||||
@@ -47,20 +44,12 @@ __device__ static inline unsigned int __popcll(unsigned long long int input) {
|
||||
return __builtin_popcountl(input);
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __clz(unsigned int input) {
|
||||
return input == 0 ? 32 : __builtin_clz(input);
|
||||
__device__ static inline int __clz(int input) {
|
||||
return __ockl_clz_u32((uint)input);
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __clzll(unsigned long long int input) {
|
||||
return input == 0 ? 64 : ( input == 0 ? -1 : __builtin_clzl(input) );
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __clz(int input) {
|
||||
return input == 0 ? 32 : ( input > 0 ? __builtin_clz(input) : __builtin_clz(~input) );
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __clzll(long long int input) {
|
||||
return input == 0 ? 64 : input > 0 ? __builtin_clzl(input) : __builtin_clzl(~input);
|
||||
__device__ static inline int __clzll(long long int input) {
|
||||
return __ockl_clz_u64((ulong)input);
|
||||
}
|
||||
|
||||
__device__ static inline unsigned int __ffs(unsigned int input) {
|
||||
|
||||
@@ -30,6 +30,11 @@ THE SOFTWARE.
|
||||
|
||||
#include "hip/hcc_detail/host_defines.h"
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
|
||||
extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int);
|
||||
extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int);
|
||||
extern "C" __device__ uint __ockl_activelane_u32(void);
|
||||
@@ -40,6 +45,11 @@ extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint);
|
||||
extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int);
|
||||
extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint);
|
||||
|
||||
extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar);
|
||||
extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort);
|
||||
extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint);
|
||||
extern "C" __device__ __attribute__((const)) ulong __ockl_clz_u64(ulong);
|
||||
|
||||
extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float);
|
||||
extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float);
|
||||
extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float);
|
||||
|
||||
@@ -67,6 +67,21 @@ unsigned int firstbit_u64(unsigned long long int a) {
|
||||
return pos;
|
||||
}
|
||||
|
||||
// Check implicit conversion will not cause ambiguity.
|
||||
__device__ void test_ambiguity() {
|
||||
short s;
|
||||
unsigned short us;
|
||||
float f;
|
||||
int i;
|
||||
unsigned int ui;
|
||||
__clz(f);
|
||||
__clz(s);
|
||||
__clz(us);
|
||||
__clzll(f);
|
||||
__clzll(i);
|
||||
__clzll(ui);
|
||||
}
|
||||
|
||||
__global__ void HIP_kernel(hipLaunchParm lp, unsigned int* a, unsigned int* b, unsigned int* c,
|
||||
unsigned long long int* d, int width, int height) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
Ссылка в новой задаче
Block a user