SWDEV-393910 - Adding CAS expanders under gfx941 tag.
Change-Id: I2c4bcf56be419d1f037b8555ad254a2dc49d0c5b
[ROCm/clr commit: feb22250f3]
Этот коммит содержится в:
коммит произвёл
Karthik Jayaprakash
родитель
fce1d56110
Коммит
62302ac4fa
@@ -26,6 +26,11 @@ THE SOFTWARE.
|
||||
|
||||
#if __has_builtin(__hip_atomic_compare_exchange_strong)
|
||||
|
||||
template<bool B, typename T, typename F> struct Cond_t;
|
||||
|
||||
template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
|
||||
template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
|
||||
|
||||
#if !__HIP_DEVICE_COMPILE__
|
||||
//TODO: Remove this after compiler pre-defines the following Macros.
|
||||
#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
|
||||
@@ -39,6 +44,73 @@ THE SOFTWARE.
|
||||
#include "amd_hip_unsafe_atomics.h"
|
||||
#endif
|
||||
|
||||
// Atomic expanders
|
||||
template<
|
||||
int mem_order = __ATOMIC_SEQ_CST,
|
||||
int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
|
||||
typename T,
|
||||
typename Op,
|
||||
typename F>
|
||||
inline
|
||||
__attribute__((always_inline, device))
|
||||
T hip_cas_expander(T* p, T x, Op op, F f) noexcept
|
||||
{
|
||||
using FP = __attribute__((address_space(0))) const void*;
|
||||
|
||||
__device__
|
||||
extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
|
||||
|
||||
if (is_shared_workaround((FP)p))
|
||||
return f();
|
||||
|
||||
using U = typename Cond_t<
|
||||
sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
|
||||
|
||||
auto q = reinterpret_cast<U*>(p);
|
||||
|
||||
U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
|
||||
U tmp1;
|
||||
do {
|
||||
tmp1 = tmp0;
|
||||
|
||||
op(reinterpret_cast<T&>(tmp1), x);
|
||||
} while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
|
||||
mem_order, mem_scope));
|
||||
|
||||
return reinterpret_cast<const T&>(tmp0);
|
||||
}
|
||||
|
||||
template<
|
||||
int mem_order = __ATOMIC_SEQ_CST,
|
||||
int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
|
||||
typename T,
|
||||
typename Cmp,
|
||||
typename F>
|
||||
inline
|
||||
__attribute__((always_inline, device))
|
||||
T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
|
||||
{
|
||||
using FP = __attribute__((address_space(0))) const void*;
|
||||
|
||||
__device__
|
||||
extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
|
||||
|
||||
if (is_shared_workaround((FP)p))
|
||||
return f();
|
||||
|
||||
using U = typename Cond_t<
|
||||
sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
|
||||
|
||||
auto q = reinterpret_cast<U*>(p);
|
||||
|
||||
U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
|
||||
while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
|
||||
!__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
|
||||
mem_scope));
|
||||
|
||||
return reinterpret_cast<const T&>(tmp);
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicCAS(int* address, int compare, int val) {
|
||||
@@ -382,49 +454,126 @@ double atomicExch_system(double* address, double val) {
|
||||
__device__
|
||||
inline
|
||||
int atomicMin(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](int x, int y) { return x < y; }, [=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicMin_system(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](int x, int y) { return x < y; }, [=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicMin(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicMin(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long x, unsigned long y) { return x < y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long x, unsigned long y) { return x < y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long x, unsigned long long y) { return x < y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long x, unsigned long long y) { return x < y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
@@ -522,49 +671,125 @@ double atomicMin_system(double* address, double val) {
|
||||
__device__
|
||||
inline
|
||||
int atomicMax(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](int x, int y) { return y < x; }, [=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicMax_system(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](int x, int y) { return y < x; }, [=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicMax(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicMax(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long x, unsigned long y) { return y < x; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long x, unsigned long y) { return y < x; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long x, unsigned long long y) { return y < x; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long x, unsigned long long y) { return y < x; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
@@ -663,154 +888,393 @@ __device__
|
||||
inline
|
||||
unsigned int atomicInc(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return __builtin_amdgcn_atomic_inc32(
|
||||
address, val, __ATOMIC_RELAXED, "agent");
|
||||
#if defined(__gfx941__)
|
||||
__device__
|
||||
extern
|
||||
unsigned int __builtin_amdgcn_atomic_inc(
|
||||
unsigned int*,
|
||||
unsigned int,
|
||||
unsigned int,
|
||||
unsigned int,
|
||||
bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
|
||||
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
|
||||
[=]() {
|
||||
return
|
||||
__builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1, false);
|
||||
});
|
||||
#else
|
||||
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
|
||||
#endif // __gfx941__
|
||||
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicDec(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return __builtin_amdgcn_atomic_dec32(
|
||||
address, val, __ATOMIC_RELAXED, "agent");
|
||||
#if defined(__gfx941__)
|
||||
__device__
|
||||
extern
|
||||
unsigned int __builtin_amdgcn_atomic_dec(
|
||||
unsigned int*,
|
||||
unsigned int,
|
||||
unsigned int,
|
||||
unsigned int,
|
||||
bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
|
||||
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
|
||||
[=]() {
|
||||
return
|
||||
__builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1, false);
|
||||
});
|
||||
#else
|
||||
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
|
||||
#endif // __gfx941__
|
||||
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicAnd(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](int& x, int y) { x &= y; }, [=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicAnd_system(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](int& x, int y) { x &= y; }, [=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicAnd(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicAnd(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long& x, unsigned long long y) { x &= y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long& x, unsigned long long y) { x &= y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicOr(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](int& x, int y) { x |= y; }, [=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicOr_system(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](int& x, int y) { x |= y; }, [=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicOr(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicOr(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long& x, unsigned long long y) { x |= y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long& x, unsigned long long y) { x |= y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicXor(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](int& x, int y) { x ^= y; }, [=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
int atomicXor_system(int* address, int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](int& x, int y) { x ^= y; }, [=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicXor(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicXor(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
|
||||
address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_SYSTEM);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
|
||||
#if defined(__gfx941__)
|
||||
return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
|
||||
address,
|
||||
val,
|
||||
[](unsigned long long& x, unsigned long long y) { x ^= y; },
|
||||
[=]() {
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
|
||||
__HIP_MEMORY_SCOPE_AGENT);
|
||||
});
|
||||
#else
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
#endif // __gfx941__
|
||||
}
|
||||
|
||||
__device__
|
||||
@@ -819,7 +1283,7 @@ unsigned long long atomicXor_system(unsigned long long* address, unsigned long l
|
||||
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
}
|
||||
|
||||
#else
|
||||
#else // __hip_atomic_compare_exchange_strong
|
||||
|
||||
__device__
|
||||
inline
|
||||
@@ -1007,16 +1471,14 @@ __device__
|
||||
inline
|
||||
unsigned int atomicInc(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return __builtin_amdgcn_atomic_inc32(
|
||||
address, val, __ATOMIC_RELAXED, "agent");
|
||||
return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
|
||||
}
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned int atomicDec(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return __builtin_amdgcn_atomic_dec32(
|
||||
address, val, __ATOMIC_RELAXED, "agent");
|
||||
return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
|
||||
}
|
||||
|
||||
__device__
|
||||
@@ -1079,4 +1541,4 @@ unsigned long long atomicXor(
|
||||
return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // __hip_atomic_compare_exchange_strong
|
||||
|
||||
Ссылка в новой задаче
Block a user