Make CAS loops use the TTAS idiom. (#1573)

* Make CAS loops use the TTAS idiom.

* More efficient re-formulation of TTAS.

* Fix typo.

* The typo was not quite a typo
This commit is contained in:
Alex Voicu
2019-10-24 13:15:20 +01:00
committed by Maneesh Gupta
parent af351d7e1b
commit 9ba25b42c8
+32 -10
View File
@@ -59,12 +59,17 @@ float atomicAdd(float* address, float val)
{
unsigned int* uaddr{reinterpret_cast<unsigned int*>(address)};
unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
unsigned int r;
unsigned int r;
do {
r = old;
r = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
if (r != old) { r = old; continue; }
old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r)));
} while (r != old);
if (r == old) break;
} while (true);
return __uint_as_float(r);
}
@@ -74,13 +79,18 @@ double atomicAdd(double* address, double val)
{
unsigned long long* uaddr{reinterpret_cast<unsigned long long*>(address)};
unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
unsigned long long r;
unsigned long long r;
do {
r = old;
r = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
if (r != old) { r = old; continue; }
old = atomicCAS(
uaddr, r, __double_as_longlong(val + __longlong_as_double(r)));
} while (r != old);
if (r == old) break;
} while (true);
return __longlong_as_double(r);
}
@@ -144,7 +154,13 @@ unsigned long long atomicMin(
unsigned long long* address, unsigned long long val)
{
unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
while (val < tmp) { tmp = atomicCAS(address, tmp, val); }
while (val < tmp) {
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
if (tmp1 != tmp) { tmp = tmp1; continue; }
tmp = atomicCAS(address, tmp, val);
}
return tmp;
}
@@ -167,7 +183,13 @@ unsigned long long atomicMax(
unsigned long long* address, unsigned long long val)
{
unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
while (tmp < val) { tmp = atomicCAS(address, tmp, val); }
while (tmp < val) {
const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
if (tmp1 != tmp) { tmp = tmp1; continue; }
tmp = atomicCAS(address, tmp, val);
}
return tmp;
}
@@ -177,7 +199,7 @@ inline
unsigned int atomicInc(unsigned int* address, unsigned int val)
{
__device__
extern
extern
unsigned int __builtin_amdgcn_atomic_inc(
unsigned int*,
unsigned int,
@@ -194,7 +216,7 @@ inline
unsigned int atomicDec(unsigned int* address, unsigned int val)
{
__device__
extern
extern
unsigned int __builtin_amdgcn_atomic_dec(
unsigned int*,
unsigned int,