From baaa2ac64d4cd14cad50aee2832ca16fc66c39b0 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Tue, 18 Feb 2025 10:17:27 -0800 Subject: [PATCH] Insert barrier after loading work items to LDS (#1551) --- src/device/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/device/common.h b/src/device/common.h index 3f47e68092..54be067108 100644 --- a/src/device/common.h +++ b/src/device/common.h @@ -560,8 +560,8 @@ __device__ __forceinline__ void ncclKernelMain(struct ncclDevKernelArgs const* a default: break; } - __synclds(); loadWorkBatchToShmem(tid%WARP_SIZE, tn, args, batchIx); + __synclds(); // Check whether the last operation was aborted and make sure all threads exit bool aborted = false;