From f3af2709b4e89976380dd031ef4f8589e2fbbfa7 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Thu, 11 Mar 2021 14:22:01 -0600 Subject: [PATCH] SWDEV-274276 - Implement system scope atomics to _system builtins. Default atomics will map to agent. Change-Id: I63b13063274418e96d4be0159c779127b166bea5 (cherry picked from commit 7c4ae8ca1daac0b75ac36c4e4a45b8d13f219d14) [ROCm/hip commit: dfaf6140cf30d1912b6b9dd3d53eada56ed756aa] --- .../hip/include/hip/amd_detail/hip_atomic.h | 420 +++++++++++++++++- 1 file changed, 418 insertions(+), 2 deletions(-) diff --git a/projects/hip/include/hip/amd_detail/hip_atomic.h b/projects/hip/include/hip/amd_detail/hip_atomic.h index 798254f2a1..fdbf5be463 100644 --- a/projects/hip/include/hip/amd_detail/hip_atomic.h +++ b/projects/hip/include/hip/amd_detail/hip_atomic.h @@ -1,7 +1,423 @@ -#pragma once + #include "device_functions.h" +#if __has_builtin(__hip_atomic_compare_exchange_strong) + +#if !__HIP_DEVICE_COMPILE__ +//TODO: Remove this after compiler pre-defines the following Macros. +#define __HIP_MEMORY_SCOPE_SINGLETHREAD 1 +#define __HIP_MEMORY_SCOPE_WAVEFRONT 2 +#define __HIP_MEMORY_SCOPE_WORKGROUP 3 +#define __HIP_MEMORY_SCOPE_AGENT 4 +#define __HIP_MEMORY_SCOPE_SYSTEM 5 +#endif + +__device__ +inline +int atomicCAS(int* address, int compare, int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +int atomicCAS_system(int* address, int compare, int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare, + unsigned long long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT); + return compare; +} + +__device__ +inline +unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare, + unsigned long long val) { + __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_SYSTEM); + return compare; +} + +__device__ +inline +int atomicAdd(int* address, int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicAdd_system(int* address, int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicAdd(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicAdd_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicAdd(float* address, float val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +float atomicAdd_system(float* address, float val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +DEPRECATED("use atomicAdd instead") +__device__ +inline +void atomicAddNoRet(float* address, float val) +{ + __ockl_atomic_add_noret_f32(address, val); +} + +__device__ +inline +double atomicAdd(double* address, double val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +double atomicAdd_system(double* address, double val) { + return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicSub(int* address, int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicSub_system(int* address, int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicSub(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicSub_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicExch(int* address, int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicExch_system(int* address, int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicExch(unsigned int* address, unsigned int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicExch_system(unsigned int* address, unsigned int val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicExch(unsigned long long* address, unsigned long long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +float atomicExch(float* address, float val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +float atomicExch_system(float* address, float val) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicMin(int* address, int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicMin_system(int* address, int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicMin(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicMin_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicMin(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicMax(int* address, int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicMax_system(int* address, int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicMax(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicMax_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicMax(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicInc(unsigned int* address, unsigned int val) +{ + __device__ + extern + unsigned int __builtin_amdgcn_atomic_inc( + unsigned int*, + unsigned int, + unsigned int, + unsigned int, + bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32"); + + return __builtin_amdgcn_atomic_inc( + address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false); +} + +__device__ +inline +unsigned int atomicDec(unsigned int* address, unsigned int val) +{ + __device__ + extern + unsigned int __builtin_amdgcn_atomic_dec( + unsigned int*, + unsigned int, + unsigned int, + unsigned int, + bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32"); + + return __builtin_amdgcn_atomic_dec( + address, val, __ATOMIC_RELAXED, 1 /* Device scope */, false); +} + +__device__ +inline +int atomicAnd(int* address, int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicAnd_system(int* address, int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicAnd(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicAnd_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} +__device__ +inline +unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicOr(int* address, int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicOr_system(int* address, int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicOr(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicOr_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicOr(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +int atomicXor(int* address, int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +int atomicXor_system(int* address, int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned int atomicXor(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned int atomicXor_system(unsigned int* address, unsigned int val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +__device__ +inline +unsigned long long atomicXor(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); +} + +__device__ +inline +unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) { + return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); +} + +#else + __device__ inline int atomicCAS(int* address, int compare, int val) @@ -268,4 +684,4 @@ unsigned long long atomicXor( return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); } -// TODO: add scoped atomics i.e. atomic{*}_system && atomic{*}_block. +#endif