Make CAS loops use the TTAS idiom. (#1573)
* Make CAS loops use the TTAS idiom. * More efficient re-formulation of TTAS. * Fix typo. * The typo was not quite a typo
This commit is contained in:
committed by
Maneesh Gupta
parent
af351d7e1b
commit
9ba25b42c8
@@ -59,12 +59,17 @@ float atomicAdd(float* address, float val)
|
||||
{
|
||||
unsigned int* uaddr{reinterpret_cast<unsigned int*>(address)};
|
||||
unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
|
||||
unsigned int r;
|
||||
|
||||
unsigned int r;
|
||||
do {
|
||||
r = old;
|
||||
r = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
|
||||
|
||||
if (r != old) { r = old; continue; }
|
||||
|
||||
old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
|
||||
} while (r != old);
|
||||
|
||||
if (r == old) break;
|
||||
} while (true);
|
||||
|
||||
return __uint_as_float(r);
|
||||
}
|
||||
@@ -74,13 +79,18 @@ double atomicAdd(double* address, double val)
|
||||
{
|
||||
unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
|
||||
unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
|
||||
unsigned long long r;
|
||||
|
||||
unsigned long long r;
|
||||
do {
|
||||
r = old;
|
||||
r = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
|
||||
|
||||
if (r != old) { r = old; continue; }
|
||||
|
||||
old = atomicCAS(
|
||||
uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
|
||||
} while (r != old);
|
||||
|
||||
if (r == old) break;
|
||||
} while (true);
|
||||
|
||||
return __longlong_as_double(r);
|
||||
}
|
||||
@@ -144,7 +154,13 @@ unsigned long long atomicMin(
|
||||
unsigned long long* address, unsigned long long val)
|
||||
{
|
||||
unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
|
||||
while (val < tmp) { tmp = atomicCAS(address, tmp, val); }
|
||||
while (val < tmp) {
|
||||
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
|
||||
|
||||
if (tmp1 != tmp) { tmp = tmp1; continue; }
|
||||
|
||||
tmp = atomicCAS(address, tmp, val);
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
@@ -167,7 +183,13 @@ unsigned long long atomicMax(
|
||||
unsigned long long* address, unsigned long long val)
|
||||
{
|
||||
unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
|
||||
while (tmp < val) { tmp = atomicCAS(address, tmp, val); }
|
||||
while (tmp < val) {
|
||||
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
|
||||
|
||||
if (tmp1 != tmp) { tmp = tmp1; continue; }
|
||||
|
||||
tmp = atomicCAS(address, tmp, val);
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
@@ -177,7 +199,7 @@ inline
|
||||
unsigned int atomicInc(unsigned int* address, unsigned int val)
|
||||
{
|
||||
__device__
|
||||
extern
|
||||
extern
|
||||
unsigned int __builtin_amdgcn_atomic_inc(
|
||||
unsigned int*,
|
||||
unsigned int,
|
||||
@@ -194,7 +216,7 @@ inline
|
||||
unsigned int atomicDec(unsigned int* address, unsigned int val)
|
||||
{
|
||||
__device__
|
||||
extern
|
||||
extern
|
||||
unsigned int __builtin_amdgcn_atomic_dec(
|
||||
unsigned int*,
|
||||
unsigned int,
|
||||
|
||||
Reference in New Issue
Block a user