Merge pull request #490 from ROCm-Developer-Tools/feature_decouple_atomics_from_hc

Switch the atomic implementation to use Clang  builtins.
This commit is contained in:
Maneesh Gupta
2018-06-20 14:16:43 +05:30
committato da GitHub
6 ha cambiato i file con 451 aggiunte e 339 eliminazioni
-123
Vedi File
@@ -150,129 +150,6 @@ __device__ clock_t clock() { return (clock_t)hc::__cycle_u64(); };
// abort
__device__ void abort() { return hc::abort(); }
// atomicAdd()
__device__ int atomicAdd(int* address, int val) { return hc::atomic_fetch_add(address, val); }
__device__ unsigned int atomicAdd(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_add(address, val);
}
__device__ unsigned long long int atomicAdd(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_fetch_add((uint64_t*)address, (uint64_t)val);
}
__device__ float atomicAdd(float* address, float val) { return hc::atomic_fetch_add(address, val); }
// atomicSub()
__device__ int atomicSub(int* address, int val) { return hc::atomic_fetch_sub(address, val); }
__device__ unsigned int atomicSub(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_sub(address, val);
}
// atomicExch()
__device__ int atomicExch(int* address, int val) { return hc::atomic_exchange(address, val); }
__device__ unsigned int atomicExch(unsigned int* address, unsigned int val) {
return hc::atomic_exchange(address, val);
}
__device__ unsigned long long int atomicExch(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_exchange((uint64_t*)address, (uint64_t)val);
}
__device__ float atomicExch(float* address, float val) { return hc::atomic_exchange(address, val); }
// atomicMin()
__device__ int atomicMin(int* address, int val) { return hc::atomic_fetch_min(address, val); }
__device__ unsigned int atomicMin(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_min(address, val);
}
__device__ unsigned long long int atomicMin(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_fetch_min((uint64_t*)address, (uint64_t)val);
}
// atomicMax()
__device__ int atomicMax(int* address, int val) { return hc::atomic_fetch_max(address, val); }
__device__ unsigned int atomicMax(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_max(address, val);
}
__device__ unsigned long long int atomicMax(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_fetch_max((uint64_t*)address, (uint64_t)val);
}
// atomicCAS()
template <typename T>
__device__ T atomicCAS_impl(T* address, T compare, T val) {
// the implementation assumes the atomic is lock-free and
// has the same size as the non-atmoic equivalent type
static_assert(sizeof(T) == sizeof(std::atomic<T>),
"size mismatch between atomic and non-atomic types");
union {
T* address;
std::atomic<T>* atomic_address;
} u;
u.address = address;
T expected = compare;
// hcc should generate a system scope atomic CAS
std::atomic_compare_exchange_weak_explicit(
u.atomic_address, &expected, val, std::memory_order_acq_rel, std::memory_order_relaxed);
return expected;
}
__device__ int atomicCAS(int* address, int compare, int val) {
return atomicCAS_impl(address, compare, val);
}
__device__ unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
return atomicCAS_impl(address, compare, val);
}
__device__ unsigned long long int atomicCAS(unsigned long long int* address,
unsigned long long int compare,
unsigned long long int val) {
return atomicCAS_impl(address, compare, val);
}
// atomicAnd()
__device__ int atomicAnd(int* address, int val) { return hc::atomic_fetch_and(address, val); }
__device__ unsigned int atomicAnd(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_and(address, val);
}
__device__ unsigned long long int atomicAnd(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_fetch_and((uint64_t*)address, (uint64_t)val);
}
// atomicOr()
__device__ int atomicOr(int* address, int val) { return hc::atomic_fetch_or(address, val); }
__device__ unsigned int atomicOr(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_or(address, val);
}
__device__ unsigned long long int atomicOr(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_fetch_or((uint64_t*)address, (uint64_t)val);
}
// atomicXor()
__device__ int atomicXor(int* address, int val) { return hc::atomic_fetch_xor(address, val); }
__device__ unsigned int atomicXor(unsigned int* address, unsigned int val) {
return hc::atomic_fetch_xor(address, val);
}
__device__ unsigned long long int atomicXor(unsigned long long int* address,
unsigned long long int val) {
return (long long int)hc::atomic_fetch_xor((uint64_t*)address, (uint64_t)val);
}
// atomicInc
__device__ unsigned int atomicInc(unsigned int* address, unsigned int val) {
return hc::__atomic_wrapinc(address, val);
}
// atomicDec
__device__ unsigned int atomicDec(unsigned int* address, unsigned int val) {
return hc::__atomic_wrapdec(address, val);
}
// warp vote function __all __any __ballot
__device__ int __all(int input) { return hc::__all(input); }