SWDEV-274276 - Implement system scope atomics to _system builtins. Default atomics will map to agent.

Change-Id: I63b13063274418e96d4be0159c779127b166bea5
(cherry picked from commit 7c4ae8ca1daac0b75ac36c4e4a45b8d13f219d14)


[ROCm/hip commit: dfaf6140cf]
Этот коммит содержится в:
kjayapra-amd
2021-03-11 14:22:01 -06:00
родитель d7ce1b93ea
Коммит f3af2709b4
+418 -2
Просмотреть файл
@@ -1,7 +1,423 @@
#pragma once
#include "device_functions.h"
#if __has_builtin(__hip_atomic_compare_exchange_strong)
#if !__HIP_DEVICE_COMPILE__
//TODO: Remove this after compiler pre-defines the following Macros.
#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
#define __HIP_MEMORY_SCOPE_WAVEFRONT 2
#define __HIP_MEMORY_SCOPE_WORKGROUP 3
#define __HIP_MEMORY_SCOPE_AGENT 4
#define __HIP_MEMORY_SCOPE_SYSTEM 5
#endif
__device__
inline
int atomicCAS(int* address, int compare, int val) {
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
return compare;
}
__device__
inline
int atomicCAS_system(int* address, int compare, int val) {
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
return compare;
}
__device__
inline
unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
return compare;
}
__device__
inline
unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
return compare;
}
__device__
inline
unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
unsigned long long val) {
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_AGENT);
return compare;
}
__device__
inline
unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
unsigned long long val) {
__hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
__HIP_MEMORY_SCOPE_SYSTEM);
return compare;
}
__device__
inline
int atomicAdd(int* address, int val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicAdd_system(int* address, int val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicAdd(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
float atomicAdd(float* address, float val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
float atomicAdd_system(float* address, float val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
DEPRECATED("use atomicAdd instead")
__device__
inline
void atomicAddNoRet(float* address, float val)
{
__ockl_atomic_add_noret_f32(address, val);
}
__device__
inline
double atomicAdd(double* address, double val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
double atomicAdd_system(double* address, double val) {
return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
int atomicSub(int* address, int val) {
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicSub_system(int* address, int val) {
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicSub(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
int atomicExch(int* address, int val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicExch_system(int* address, int val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicExch(unsigned int* address, unsigned int val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
float atomicExch(float* address, float val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
float atomicExch_system(float* address, float val) {
return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
int atomicMin(int* address, int val) {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicMin_system(int* address, int val) {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicMin(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
int atomicMax(int* address, int val) {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicMax_system(int* address, int val) {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicMax(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicInc(unsigned int* address, unsigned int val)
{
__device__
extern
unsigned int __builtin_amdgcn_atomic_inc(
unsigned int*,
unsigned int,
unsigned int,
unsigned int,
bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
return __builtin_amdgcn_atomic_inc(
address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
}
__device__
inline
unsigned int atomicDec(unsigned int* address, unsigned int val)
{
__device__
extern
unsigned int __builtin_amdgcn_atomic_dec(
unsigned int*,
unsigned int,
unsigned int,
unsigned int,
bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
return __builtin_amdgcn_atomic_dec(
address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false);
}
__device__
inline
int atomicAnd(int* address, int val) {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicAnd_system(int* address, int val) {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicAnd(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
int atomicOr(int* address, int val) {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicOr_system(int* address, int val) {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicOr(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
int atomicXor(int* address, int val) {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
int atomicXor_system(int* address, int val) {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned int atomicXor(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
__device__
inline
unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
__device__
inline
unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
}
#else
__device__
inline
int atomicCAS(int* address, int compare, int val)
@@ -268,4 +684,4 @@ unsigned long long atomicXor(
return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
}
// TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block.
#endif