From aa95985867c2d4b2f57d6033ab18c55ff545b452 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Mon, 28 Aug 2023 08:28:05 -0700 Subject: [PATCH] rccl-prim-test: use non-temporal access (#867) --- tools/rccl-prim-test/copy_kernel.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/rccl-prim-test/copy_kernel.h b/tools/rccl-prim-test/copy_kernel.h index 7ea328c3eb..62e0bade2a 100644 --- a/tools/rccl-prim-test/copy_kernel.h +++ b/tools/rccl-prim-test/copy_kernel.h @@ -28,12 +28,12 @@ struct MULTI { template inline __device__ T vFetch(const volatile T* ptr) { - return *ptr; + return __builtin_nontemporal_load(ptr); } template inline __device__ void vStore(volatile T* ptr, const T val) { - *ptr = val; + __builtin_nontemporal_store(val, ptr); } template @@ -86,16 +86,16 @@ struct MULTI128 { inline __device__ void Fetch128(Pack128& v, const Pack128* p) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__) - v.x = p->x; - v.y = p->y; + v.x = __builtin_nontemporal_load(&p->x); + v.y = __builtin_nontemporal_load(&p->y); #else asm volatile("ld.volatile.global.v2.u64 {%0,%1}, [%2];" : "=l"(v.x), "=l"(v.y) : "l"(p) : "memory"); #endif } inline __device__ void Store128(Pack128* p, Pack128& v) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__) - p->x = v.x; - p->y = v.y; + __builtin_nontemporal_store(v.x, &p->x); + __builtin_nontemporal_store(v.y, &p->y); #else asm volatile("st.volatile.global.v2.u64 [%0], {%1,%2};" :: "l"(p), "l"(v.x), "l"(v.y) : "memory"); #endif