From 23e99dbb0785042c8f548f1dd7bbbb7554ed825b Mon Sep 17 00:00:00 2001 From: sdashmiz Date: Mon, 24 Apr 2023 15:08:45 -0400 Subject: [PATCH] SWDEV-396533 - correct _shfl function to match cuda Signed-off-by: sdashmiz Change-Id: I311419fd25c055339f25fe0c7a132ec9ee225600 --- hipamd/include/hip/amd_detail/amd_warp_functions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/include/hip/amd_detail/amd_warp_functions.h b/hipamd/include/hip/amd_detail/amd_warp_functions.h index 9b268b5121..1e9eba8920 100644 --- a/hipamd/include/hip/amd_detail/amd_warp_functions.h +++ b/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -89,7 +89,7 @@ __device__ inline int __shfl(int var, int src_lane, int width = warpSize) { int self = __lane_id(); - int index = src_lane + (self & ~(width-1)); + int index = (src_lane & (width - 1)) + (self & ~(width-1)); return __builtin_amdgcn_ds_bpermute(index<<2, var); } __device__