From cfa8fc1ca554d05c9538cc481523cc70954c1d4e Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 13 Jun 2018 15:59:45 +0000 Subject: [PATCH] Add __llvm_fence funcs and __ prefixes --- include/hip/hcc_detail/hip_runtime.h | 52 ++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 34650d728a..b9c7ea603c 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -652,40 +652,62 @@ void __assertfail(const char * __assertion, // hip.amdgcn.bc - sync threads // extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); -#define CLK_LOCAL_MEM_FENCE 0x01 -#define local __attribute__((address_space(3))) +#define __CLK_LOCAL_MEM_FENCE 0x01 +#define __local __attribute__((address_space(3))) -typedef unsigned cl_mem_fence_flags; +typedef unsigned __cl_mem_fence_flags; -typedef enum memory_scope { +typedef enum __memory_scope { memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} memory_scope; +} __memory_scope; // enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum memory_order +typedef enum __memory_order { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, memory_order_seq_cst = __ATOMIC_SEQ_CST -} memory_order; +} __memory_order; -extern "C" __device__ __attribute__((overloadable)) -void atomic_work_item_fence(cl_mem_fence_flags, memory_order, memory_scope); +// __llvm_fence* functions from device-libs/irif/src/fence.ll +extern "C" __device__ void __llvm_fence_acq_sg(void); +extern "C" __device__ void __llvm_fence_acq_wg(void); +extern "C" __device__ void __llvm_fence_acq_dev(void); +extern "C" __device__ void __llvm_fence_acq_sys(void); + +extern "C" __device__ void __llvm_fence_rel_sg(void); +extern "C" __device__ void __llvm_fence_rel_wg(void); +extern "C" __device__ void __llvm_fence_rel_dev(void); +extern "C" __device__ void __llvm_fence_rel_sys(void); __device__ inline -static void hc_work_group_barrier(cl_mem_fence_flags flags, memory_scope scope) +static void hc_work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) { if (flags) { - atomic_work_item_fence(flags, memory_order_release, scope); + switch(scope) { + case memory_scope_work_item: break; + case memory_scope_sub_group: __llvm_fence_rel_sg(); break; + case memory_scope_work_group: __llvm_fence_rel_wg(); break; + case memory_scope_device: __llvm_fence_rel_dev(); break; + case memory_scope_all_svm_devices: __llvm_fence_rel_sys(); break; + } + //atomic_work_item_fence(flags, memory_order_release, scope); __builtin_amdgcn_s_barrier(); - atomic_work_item_fence(flags, memory_order_acquire, scope); + //atomic_work_item_fence(flags, memory_order_acquire, scope); + switch(scope) { + case memory_scope_work_item: break; + case memory_scope_sub_group: __llvm_fence_acq_sg(); break; + case memory_scope_work_group: __llvm_fence_acq_wg(); break; + case memory_scope_device: __llvm_fence_acq_dev(); break; + case memory_scope_all_svm_devices: __llvm_fence_acq_sys(); break; + } } else { __builtin_amdgcn_s_barrier(); } @@ -695,7 +717,7 @@ __device__ inline static void hc_barrier(int n) { - hc_work_group_barrier((cl_mem_fence_flags)n, memory_scope_work_group); + hc_work_group_barrier((__cl_mem_fence_flags)n, memory_scope_work_group); } __device__ @@ -703,7 +725,7 @@ inline __attribute__((noduplicate)) void __syncthreads() { - hc_barrier(CLK_LOCAL_MEM_FENCE); + hc_barrier(__CLK_LOCAL_MEM_FENCE); } @@ -711,7 +733,7 @@ __device__ unsigned __llvm_amdgcn_s_getreg(unsigned) __asm("llvm.amdgcn.s.getreg __device__ unsigned __llvm_amdgcn_groupstaticsize() __asm("llvm.amdgcn.groupstaticsize"); -__device__ inline static local char* __to_local(unsigned x) { return (local char*)x; } +__device__ inline static __local char* __to_local(unsigned x) { return (__local char*)x; } __device__ inline void *__amdgcn_get_dynamicgroupbaseptr() { #if 0