SWDEV-393910 - Adding CAS expanders under gfx941 tag.

Change-Id: I2c4bcf56be419d1f037b8555ad254a2dc49d0c5b


[ROCm/clr commit: feb22250f3]
Этот коммит содержится в:
kjayapra-amd
2023-05-16 09:51:48 -04:00
коммит произвёл Karthik Jayaprakash
родитель fce1d56110
Коммит 62302ac4fa
+472 -10
Просмотреть файл
@@ -26,6 +26,11 @@ THE SOFTWARE.
#if __has_builtin(__hip_atomic_compare_exchange_strong)
template<bool B, typename T, typename F> struct Cond_t;
template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
#if !__HIP_DEVICE_COMPILE__
//TODO: Remove this after compiler pre-defines the following Macros.
#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
@@ -39,6 +44,73 @@ THE SOFTWARE.
#include "amd_hip_unsafe_atomics.h"
#endif
// Atomic expanders
template<
int mem_order = __ATOMIC_SEQ_CST,
int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
typename T,
typename Op,
typename F>
inline
__attribute__((always_inline, device))
T hip_cas_expander(T* p, T x, Op op, F f) noexcept
{
using FP = __attribute__((address_space(0))) const void*;
__device__
extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
if (is_shared_workaround((FP)p))
return f();
using U = typename Cond_t<
sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
auto q = reinterpret_cast<U*>(p);
U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
U tmp1;
do {
tmp1 = tmp0;
op(reinterpret_cast<T&>(tmp1), x);
} while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
mem_order, mem_scope));
return reinterpret_cast<const T&>(tmp0);
}
template<
int mem_order = __ATOMIC_SEQ_CST,
int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
typename T,
typename Cmp,
typename F>
inline
__attribute__((always_inline, device))
T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
{
using FP = __attribute__((address_space(0))) const void*;
__device__
extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
if (is_shared_workaround((FP)p))
return f();
using U = typename Cond_t<
sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
auto q = reinterpret_cast<U*>(p);
U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
!__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
mem_scope));
return reinterpret_cast<const T&>(tmp);
}
__device__
inline
int atomicCAS(int* address, int compare, int val) {
@@ -382,49 +454,126 @@ double atomicExch_system(double* address, double val) {
__device__
inline
int atomicMin(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int x, int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicMin_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int x, int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMin(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMin(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long x, unsigned long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long x, unsigned long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long x, unsigned long long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long x, unsigned long long y) { return x < y; },
[=]() {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
@@ -522,49 +671,125 @@ double atomicMin_system(double* address, double val) {
__device__
inline
int atomicMax(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int x, int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicMax_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int x, int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMax(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMax(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long x, unsigned long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long x, unsigned long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long x, unsigned long long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long x, unsigned long long y) { return y < x; },
[=]() {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
@@ -663,154 +888,393 @@ __device__
inline
unsigned int atomicInc(unsigned int* address, unsigned int val)
{
return __builtin_amdgcn_atomic_inc32(
address, val, __ATOMIC_RELAXED, "agent");
#if defined(__gfx941__)
__device__
extern
unsigned int __builtin_amdgcn_atomic_inc(
unsigned int*,
unsigned int,
unsigned int,
unsigned int,
bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
[=]() {
return
__builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1, false);
});
#else
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
#endif // __gfx941__
}
__device__
inline
unsigned int atomicDec(unsigned int* address, unsigned int val)
{
return __builtin_amdgcn_atomic_dec32(
address, val, __ATOMIC_RELAXED, "agent");
#if defined(__gfx941__)
__device__
extern
unsigned int __builtin_amdgcn_atomic_dec(
unsigned int*,
unsigned int,
unsigned int,
unsigned int,
bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
[=]() {
return
__builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1, false);
});
#else
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
#endif // __gfx941__
}
__device__
inline
int atomicAnd(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int& x, int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicAnd_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int& x, int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicAnd(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicAnd(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x &= y; },
[=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x &= y; },
[=]() {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
int atomicOr(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int& x, int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicOr_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int& x, int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicOr(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicOr(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x |= y; },
[=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x |= y; },
[=]() {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
int atomicXor(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](int& x, int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
int atomicXor_system(int* address, int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](int& x, int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicXor(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicXor(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
inline
unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
#endif // __gfx941__
}
__device__
inline
unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
#if defined(__gfx941__)
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
address,
val,
[](unsigned long long& x, unsigned long long y) { x ^= y; },
[=]() {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
});
#else
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
#endif // __gfx941__
}
__device__
@@ -819,7 +1283,7 @@ unsigned long long atomicXor_system(unsigned long long* address, unsigned long l
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
#else
#else // __hip_atomic_compare_exchange_strong
__device__
inline
@@ -1007,16 +1471,14 @@ __device__
inline
unsigned int atomicInc(unsigned int* address, unsigned int val)
{
return __builtin_amdgcn_atomic_inc32(
address, val, __ATOMIC_RELAXED, "agent");
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
}
__device__
inline
unsigned int atomicDec(unsigned int* address, unsigned int val)
{
return __builtin_amdgcn_atomic_dec32(
address, val, __ATOMIC_RELAXED, "agent");
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
}
__device__
@@ -1079,4 +1541,4 @@ unsigned long long atomicXor(
return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
}
#endif
#endif // __hip_atomic_compare_exchange_strong