From 62302ac4fa90f650316dee59ec39cc574556b73a Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Tue, 16 May 2023 09:51:48 -0400 Subject: [PATCH] SWDEV-393910 - Adding CAS expanders under gfx941 tag. Change-Id: I2c4bcf56be419d1f037b8555ad254a2dc49d0c5b [ROCm/clr commit: feb22250f3ee30b52ed0645721fac506bd776737] --- .../include/hip/amd_detail/amd_hip_atomic.h | 482 +++++++++++++++++- 1 file changed, 472 insertions(+), 10 deletions(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h index 869f495c89..e201fb6228 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_atomic.h @@ -26,6 +26,11 @@ THE SOFTWARE. #if __has_builtin(__hip_atomic_compare_exchange_strong) +template struct Cond_t; + +template struct Cond_t { using type = T; }; +template struct Cond_t { using type = F; }; + #if !__HIP_DEVICE_COMPILE__ //TODO: Remove this after compiler pre-defines the following Macros. #define __HIP_MEMORY_SCOPE_SINGLETHREAD 1 @@ -39,6 +44,73 @@ THE SOFTWARE. #include "amd_hip_unsafe_atomics.h" #endif +// Atomic expanders +template< + int mem_order = __ATOMIC_SEQ_CST, + int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM, + typename T, + typename Op, + typename F> +inline +__attribute__((always_inline, device)) +T hip_cas_expander(T* p, T x, Op op, F f) noexcept +{ + using FP = __attribute__((address_space(0))) const void*; + + __device__ + extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared"); + + if (is_shared_workaround((FP)p)) + return f(); + + using U = typename Cond_t< + sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type; + + auto q = reinterpret_cast(p); + + U tmp0{__hip_atomic_load(q, mem_order, mem_scope)}; + U tmp1; + do { + tmp1 = tmp0; + + op(reinterpret_cast(tmp1), x); + } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order, + mem_order, mem_scope)); + + return reinterpret_cast(tmp0); +} + +template< + int mem_order = __ATOMIC_SEQ_CST, + int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM, + typename T, + typename Cmp, + typename F> +inline +__attribute__((always_inline, device)) +T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept +{ + using FP = __attribute__((address_space(0))) const void*; + + __device__ + extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared"); + + if (is_shared_workaround((FP)p)) + return f(); + + using U = typename Cond_t< + sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type; + + auto q = reinterpret_cast(p); + + U tmp{__hip_atomic_load(q, mem_order, mem_scope)}; + while (cmp(x, reinterpret_cast(tmp)) && + !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order, + mem_scope)); + + return reinterpret_cast(tmp); +} + __device__ inline int atomicCAS(int* address, int compare, int val) { @@ -382,49 +454,126 @@ double atomicExch_system(double* address, double val) { __device__ inline int atomicMin(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](int x, int y) { return x < y; }, [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline int atomicMin_system(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](int x, int y) { return x < y; }, [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned int atomicMin(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ + } __device__ inline unsigned int atomicMin_system(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long long atomicMin(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long x, unsigned long y) { return x < y; }, + [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long atomicMin_system(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, + val, + [](unsigned long x, unsigned long y) { return x < y; }, + [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long long atomicMin(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long long x, unsigned long long y) { return x < y; }, + [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, + val, + [](unsigned long long x, unsigned long long y) { return x < y; }, + [=]() { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ @@ -522,49 +671,125 @@ double atomicMin_system(double* address, double val) { __device__ inline int atomicMax(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](int x, int y) { return y < x; }, [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline int atomicMax_system(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](int x, int y) { return y < x; }, [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned int atomicMax(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned int atomicMax_system(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long atomicMax(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long x, unsigned long y) { return y < x; }, + [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long atomicMax_system(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, + val, + [](unsigned long x, unsigned long y) { return y < x; }, + [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long long atomicMax(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long long x, unsigned long long y) { return y < x; }, + [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, + val, + [](unsigned long long x, unsigned long long y) { return y < x; }, + [=]() { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ @@ -663,154 +888,393 @@ __device__ inline unsigned int atomicInc(unsigned int* address, unsigned int val) { - return __builtin_amdgcn_atomic_inc32( - address, val, __ATOMIC_RELAXED, "agent"); +#if defined(__gfx941__) + __device__ + extern + unsigned int __builtin_amdgcn_atomic_inc( + unsigned int*, + unsigned int, + unsigned int, + unsigned int, + bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32"); + + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); }, + [=]() { + return + __builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1, false); + }); +#else + return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent"); +#endif // __gfx941__ + } __device__ inline unsigned int atomicDec(unsigned int* address, unsigned int val) { - return __builtin_amdgcn_atomic_dec32( - address, val, __ATOMIC_RELAXED, "agent"); +#if defined(__gfx941__) + __device__ + extern + unsigned int __builtin_amdgcn_atomic_dec( + unsigned int*, + unsigned int, + unsigned int, + unsigned int, + bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32"); + + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); }, + [=]() { + return + __builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1, false); + }); +#else + return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent"); +#endif // __gfx941__ + } __device__ inline int atomicAnd(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](int& x, int y) { x &= y; }, [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline int atomicAnd_system(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](int& x, int y) { x &= y; }, [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned int atomicAnd(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned int atomicAnd_system(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long atomicAnd(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long atomicAnd_system(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long long& x, unsigned long long y) { x &= y; }, + [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, + val, + [](unsigned long long& x, unsigned long long y) { x &= y; }, + [=]() { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline int atomicOr(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](int& x, int y) { x |= y; }, [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline int atomicOr_system(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](int& x, int y) { x |= y; }, [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned int atomicOr(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned int atomicOr_system(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long atomicOr(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long atomicOr_system(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long long atomicOr(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long long& x, unsigned long long y) { x |= y; }, + [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, + val, + [](unsigned long long& x, unsigned long long y) { x |= y; }, + [=]() { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline int atomicXor(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](int& x, int y) { x ^= y; }, [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline int atomicXor_system(int* address, int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](int& x, int y) { x ^= y; }, [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned int atomicXor(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned int atomicXor_system(unsigned int* address, unsigned int val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long atomicXor(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ inline unsigned long atomicXor_system(unsigned long* address, unsigned long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>( + address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +#endif // __gfx941__ } __device__ inline unsigned long long atomicXor(unsigned long long* address, unsigned long long val) { +#if defined(__gfx941__) + return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>( + address, + val, + [](unsigned long long& x, unsigned long long y) { x ^= y; }, + [=]() { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + }); +#else return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +#endif // __gfx941__ } __device__ @@ -819,7 +1283,7 @@ unsigned long long atomicXor_system(unsigned long long* address, unsigned long l return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); } -#else +#else // __hip_atomic_compare_exchange_strong __device__ inline @@ -1007,16 +1471,14 @@ __device__ inline unsigned int atomicInc(unsigned int* address, unsigned int val) { - return __builtin_amdgcn_atomic_inc32( - address, val, __ATOMIC_RELAXED, "agent"); + return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent"); } __device__ inline unsigned int atomicDec(unsigned int* address, unsigned int val) { - return __builtin_amdgcn_atomic_dec32( - address, val, __ATOMIC_RELAXED, "agent"); + return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent"); } __device__ @@ -1079,4 +1541,4 @@ unsigned long long atomicXor( return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); } -#endif +#endif // __hip_atomic_compare_exchange_strong