Merge pull request #490 from ROCm-Developer-Tools/feature_decouple_atomics_from_hc
Switch the atomic implementation to use Clang builtins.
This commit is contained in:
@@ -150,129 +150,6 @@ __device__ clock_t clock() { return (clock_t)hc::__cycle_u64(); };
|
||||
// abort
|
||||
__device__ void abort() { return hc::abort(); }
|
||||
|
||||
// atomicAdd()
|
||||
__device__ int atomicAdd(int* address, int val) { return hc::atomic_fetch_add(address, val); }
|
||||
__device__ unsigned int atomicAdd(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_add(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicAdd(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_fetch_add((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
__device__ float atomicAdd(float* address, float val) { return hc::atomic_fetch_add(address, val); }
|
||||
|
||||
// atomicSub()
|
||||
__device__ int atomicSub(int* address, int val) { return hc::atomic_fetch_sub(address, val); }
|
||||
__device__ unsigned int atomicSub(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_sub(address, val);
|
||||
}
|
||||
|
||||
// atomicExch()
|
||||
__device__ int atomicExch(int* address, int val) { return hc::atomic_exchange(address, val); }
|
||||
__device__ unsigned int atomicExch(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_exchange(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicExch(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_exchange((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
__device__ float atomicExch(float* address, float val) { return hc::atomic_exchange(address, val); }
|
||||
|
||||
// atomicMin()
|
||||
__device__ int atomicMin(int* address, int val) { return hc::atomic_fetch_min(address, val); }
|
||||
__device__ unsigned int atomicMin(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_min(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicMin(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_fetch_min((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
|
||||
// atomicMax()
|
||||
__device__ int atomicMax(int* address, int val) { return hc::atomic_fetch_max(address, val); }
|
||||
__device__ unsigned int atomicMax(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_max(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicMax(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_fetch_max((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
|
||||
// atomicCAS()
|
||||
template <typename T>
|
||||
__device__ T atomicCAS_impl(T* address, T compare, T val) {
|
||||
// the implementation assumes the atomic is lock-free and
|
||||
// has the same size as the non-atmoic equivalent type
|
||||
static_assert(sizeof(T) == sizeof(std::atomic<T>),
|
||||
"size mismatch between atomic and non-atomic types");
|
||||
|
||||
union {
|
||||
T* address;
|
||||
std::atomic<T>* atomic_address;
|
||||
} u;
|
||||
u.address = address;
|
||||
|
||||
T expected = compare;
|
||||
|
||||
// hcc should generate a system scope atomic CAS
|
||||
std::atomic_compare_exchange_weak_explicit(
|
||||
u.atomic_address, &expected, val, std::memory_order_acq_rel, std::memory_order_relaxed);
|
||||
return expected;
|
||||
}
|
||||
|
||||
__device__ int atomicCAS(int* address, int compare, int val) {
|
||||
return atomicCAS_impl(address, compare, val);
|
||||
}
|
||||
__device__ unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
|
||||
return atomicCAS_impl(address, compare, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicCAS(unsigned long long int* address,
|
||||
unsigned long long int compare,
|
||||
unsigned long long int val) {
|
||||
return atomicCAS_impl(address, compare, val);
|
||||
}
|
||||
|
||||
// atomicAnd()
|
||||
__device__ int atomicAnd(int* address, int val) { return hc::atomic_fetch_and(address, val); }
|
||||
__device__ unsigned int atomicAnd(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_and(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicAnd(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_fetch_and((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
|
||||
// atomicOr()
|
||||
__device__ int atomicOr(int* address, int val) { return hc::atomic_fetch_or(address, val); }
|
||||
__device__ unsigned int atomicOr(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_or(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicOr(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_fetch_or((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
|
||||
// atomicXor()
|
||||
__device__ int atomicXor(int* address, int val) { return hc::atomic_fetch_xor(address, val); }
|
||||
__device__ unsigned int atomicXor(unsigned int* address, unsigned int val) {
|
||||
return hc::atomic_fetch_xor(address, val);
|
||||
}
|
||||
__device__ unsigned long long int atomicXor(unsigned long long int* address,
|
||||
unsigned long long int val) {
|
||||
return (long long int)hc::atomic_fetch_xor((uint64_t*)address, (uint64_t)val);
|
||||
}
|
||||
|
||||
// atomicInc
|
||||
__device__ unsigned int atomicInc(unsigned int* address, unsigned int val) {
|
||||
return hc::__atomic_wrapinc(address, val);
|
||||
}
|
||||
|
||||
// atomicDec
|
||||
__device__ unsigned int atomicDec(unsigned int* address, unsigned int val) {
|
||||
return hc::__atomic_wrapdec(address, val);
|
||||
}
|
||||
|
||||
|
||||
// warp vote function __all __any __ballot
|
||||
__device__ int __all(int input) { return hc::__all(input); }
|
||||
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user