diff --git a/include/hip/hcc_detail/hip_atomic.h b/include/hip/hcc_detail/hip_atomic.h index a5ac94a74b..2c13411319 100644 --- a/include/hip/hcc_detail/hip_atomic.h +++ b/include/hip/hcc_detail/hip_atomic.h @@ -59,12 +59,17 @@ float atomicAdd(float* address, float val) { unsigned int* uaddr{reinterpret_cast(address)}; unsigned int old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; - unsigned int r; + unsigned int r; do { - r = old; + r = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + + if (r != old) { r = old; continue; } + old = atomicCAS(uaddr, r, __float_as_uint(val + __uint_as_float(r))); - } while (r != old); + + if (r == old) break; + } while (true); return __uint_as_float(r); } @@ -74,13 +79,18 @@ double atomicAdd(double* address, double val) { unsigned long long* uaddr{reinterpret_cast(address)}; unsigned long long old{__atomic_load_n(uaddr, __ATOMIC_RELAXED)}; - unsigned long long r; + unsigned long long r; do { - r = old; + r = __atomic_load_n(uaddr, __ATOMIC_RELAXED); + + if (r != old) { r = old; continue; } + old = atomicCAS( uaddr, r, __double_as_longlong(val + __longlong_as_double(r))); - } while (r != old); + + if (r == old) break; + } while (true); return __longlong_as_double(r); } @@ -144,7 +154,13 @@ unsigned long long atomicMin( unsigned long long* address, unsigned long long val) { unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; - while (val < tmp) { tmp = atomicCAS(address, tmp, val); } + while (val < tmp) { + const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED); + + if (tmp1 != tmp) { tmp = tmp1; continue; } + + tmp = atomicCAS(address, tmp, val); + } return tmp; } @@ -167,7 +183,13 @@ unsigned long long atomicMax( unsigned long long* address, unsigned long long val) { unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)}; - while (tmp < val) { tmp = atomicCAS(address, tmp, val); } + while (tmp < val) { + const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED); + + if (tmp1 != tmp) { tmp = tmp1; continue; } + + tmp = atomicCAS(address, tmp, val); + } return tmp; } @@ -177,7 +199,7 @@ inline unsigned int atomicInc(unsigned int* address, unsigned int val) { __device__ - extern + extern unsigned int __builtin_amdgcn_atomic_inc( unsigned int*, unsigned int, @@ -194,7 +216,7 @@ inline unsigned int atomicDec(unsigned int* address, unsigned int val) { __device__ - extern + extern unsigned int __builtin_amdgcn_atomic_dec( unsigned int*, unsigned int,