Implement __ballot, __any, __all into HIP headers

[ROCm/hip commit: 6dc16bbf04]
This commit is contained in:
Aaron Enye Shi
2018-06-19 22:07:26 +00:00
parent 2a2f34b42d
commit e2f8af2f3d
2 changed files with 42 additions and 31 deletions
@@ -585,23 +585,51 @@ void __named_sync(int a, int b) { __builtin_amdgcn_s_barrier(); }
#endif // __HIP_DEVICE_COMPILE__
// warp vote function __all __any __ballot
__device__
int __all(int input);
__device__
int __any(int input);
__device__
unsigned long long int __ballot(int input);
extern "C" __device__ inline uint64_t __activelanemask_v4_b64_b1(unsigned int input) {
uint64_t output;
// define i64 @__activelanemask_v4_b64_b1(i32 %input) #5 {
// %a = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %input) #9
// ret i64 %a
// }
__asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(output) : "v"(input));
return output;
}
__device__
inline
uint64_t __ballot64(int a) {
int64_t s;
// define i64 @__ballot64(i32 %a) #0 {
// %b = tail call i64 asm "v_cmp_ne_i32_e64 $0, 0, $1", "=s,v"(i32 %a) #1
// ret i64 %b
// }
__asm("v_cmp_ne_i32_e64 %0, 0, %1" : "=s"(s) : "v"(a));
return s;
unsigned int __activelanecount_u32_b1(unsigned int input) {
return __popcll(__activelanemask_v4_b64_b1(input));
}
__device__
inline
int __all(int predicate) {
return __popcll(__activelanemask_v4_b64_b1(predicate)) == __activelanecount_u32_b1(1);
}
__device__
inline
int __any(int predicate) {
#ifdef NVCC_COMPAT
if (__popcll(__activelanemask_v4_b64_b1(predicate)) != 0)
return 1;
else
return 0;
#else
return __popcll(__activelanemask_v4_b64_b1(predicate));
#endif
}
__device__
inline
unsigned long long int __ballot(int predicate) {
return __activelanemask_v4_b64_b1(predicate);
}
__device__
inline
unsigned long long int __ballot64(int predicate) {
return __activelanemask_v4_b64_b1(predicate);
}
// hip.amdgcn.bc - lanemask
-17
View File
@@ -147,23 +147,6 @@ __device__ void* __hip_hc_memset(void* dst, uint8_t val, size_t size) {
// abort
__device__ void abort() { return hc::abort(); }
// warp vote function __all __any __ballot
__device__ int __all(int input) { return hc::__all(input); }
__device__ int __any(int input) {
#ifdef NVCC_COMPAT
if (hc::__any(input) != 0)
return 1;
else
return 0;
#else
return hc::__any(input);
#endif
}
__device__ unsigned long long int __ballot(int input) { return hc::__ballot(input); }
// warp shuffle functions
__device__ int __shfl(int input, int lane, int width) { return hc::__shfl(input, lane, width); }