From cc4de2bc28919d25105232e187e2835ddfbf6639 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Mon, 29 Apr 2019 15:07:11 -0400 Subject: [PATCH] [devfunc] Re-implement ballot using AMDGCN builtins - As the signature of `amdgcn.icmp` is changed for next-gen chip, using clang builtins is portable way to hide that details. [ROCm/hip commit: a64637da2c59b0a0db4b47777dd8009b1c9d4ccb] --- projects/hip/include/hip/hcc_detail/device_functions.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/projects/hip/include/hip/hcc_detail/device_functions.h b/projects/hip/include/hip/hcc_detail/device_functions.h index 044ca1e634..83514ae079 100644 --- a/projects/hip/include/hip/hcc_detail/device_functions.h +++ b/projects/hip/include/hip/hcc_detail/device_functions.h @@ -736,13 +736,21 @@ int __any(int predicate) { __device__ inline unsigned long long int __ballot(int predicate) { +#if defined(__HCC__) return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); +#else + return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE); +#endif } __device__ inline unsigned long long int __ballot64(int predicate) { +#if defined(__HCC__) return __llvm_amdgcn_icmp_i32(predicate, 0, ICMP_NE); +#else + return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE); +#endif } // hip.amdgcn.bc - lanemask