[devfunc] Re-implement ballot using AMDGCN builtins
- As the signature of `amdgcn.icmp` is changed for next-gen chip, using
clang builtins is portable way to hide that details.
[ROCm/hip commit: a64637da2c]
Этот коммит содержится в:
@@ -736,13 +736,21 @@ int __any(int predicate) {
|
||||
__device__
|
||||
inline
|
||||
unsigned long long int __ballot(int predicate) {
|
||||
#if defined(__HCC__)
|
||||
return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE);
|
||||
#else
|
||||
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
|
||||
#endif
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long int __ballot64(int predicate) {
|
||||
#if defined(__HCC__)
|
||||
return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE);
|
||||
#else
|
||||
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
|
||||
#endif
|
||||
}
|
||||
|
||||
// hip.amdgcn.bc - lanemask
|
||||
|
||||
Ссылка в новой задаче
Block a user