From 1835636df3afbe6322e5e456306cbc0801b2aefb Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 4 Oct 2018 21:47:28 +0000 Subject: [PATCH] Replace IRIF fences with atomic_work_item_fence [ROCm/clr commit: 03822afaa952d62a057edd44430843cb8b0f38a4] --- .../include/hip/hcc_detail/device_functions.h | 74 ------------------- .../hip/hcc_detail/device_library_decls.h | 40 +++++----- 2 files changed, 22 insertions(+), 92 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h b/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h index b319f26e03..42927e3246 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h @@ -752,80 +752,6 @@ void *__amdgcn_get_dynamicgroupbaseptr() { return __get_dynamicgroupbaseptr(); } - - -// hip.amdgcn.bc - sync threads -#define __CLK_LOCAL_MEM_FENCE 0x01 -typedef unsigned __cl_mem_fence_flags; - -typedef enum __memory_scope { - __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, - __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -} __memory_scope; - -// enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum __memory_order -{ - __memory_order_relaxed = __ATOMIC_RELAXED, - __memory_order_acquire = __ATOMIC_ACQUIRE, - __memory_order_release = __ATOMIC_RELEASE, - __memory_order_acq_rel = __ATOMIC_ACQ_REL, - __memory_order_seq_cst = __ATOMIC_SEQ_CST -} __memory_order; - -__device__ -inline -static void -__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope) -{ - // We're tying global-happens-before and local-happens-before together as does HSA - if (order != __memory_order_relaxed) { - switch (scope) { - case __memory_scope_work_item: - break; - case __memory_scope_sub_group: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_sg(); break; - case __memory_order_release: __llvm_fence_rel_sg(); break; - case __memory_order_acq_rel: __llvm_fence_ar_sg(); break; - case __memory_order_seq_cst: __llvm_fence_sc_sg(); break; - } - break; - case __memory_scope_work_group: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_wg(); break; - case __memory_order_release: __llvm_fence_rel_wg(); break; - case __memory_order_acq_rel: __llvm_fence_ar_wg(); break; - case __memory_order_seq_cst: __llvm_fence_sc_wg(); break; - } - break; - case __memory_scope_device: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_dev(); break; - case __memory_order_release: __llvm_fence_rel_dev(); break; - case __memory_order_acq_rel: __llvm_fence_ar_dev(); break; - case __memory_order_seq_cst: __llvm_fence_sc_dev(); break; - } - break; - case __memory_scope_all_svm_devices: - switch (order) { - case __memory_order_relaxed: break; - case __memory_order_acquire: __llvm_fence_acq_sys(); break; - case __memory_order_release: __llvm_fence_rel_sys(); break; - case __memory_order_acq_rel: __llvm_fence_ar_sys(); break; - case __memory_order_seq_cst: __llvm_fence_sc_sys(); break; - } - break; - } - } -} - // Memory Fence Functions __device__ inline diff --git a/projects/clr/hipamd/include/hip/hcc_detail/device_library_decls.h b/projects/clr/hipamd/include/hip/hcc_detail/device_library_decls.h index d64877d50a..2a0b968fe3 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/device_library_decls.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/device_library_decls.h @@ -65,26 +65,30 @@ extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float) __device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; } #endif //__HIP_DEVICE_COMPILE__ -// __llvm_fence* functions from device-libs/irif/src/fence.ll -extern "C" __device__ void __llvm_fence_acq_sg(void); -extern "C" __device__ void __llvm_fence_acq_wg(void); -extern "C" __device__ void __llvm_fence_acq_dev(void); -extern "C" __device__ void __llvm_fence_acq_sys(void); +// Using hip.amdgcn.bc - sync threads +#define __CLK_LOCAL_MEM_FENCE 0x01 +typedef unsigned __cl_mem_fence_flags; -extern "C" __device__ void __llvm_fence_rel_sg(void); -extern "C" __device__ void __llvm_fence_rel_wg(void); -extern "C" __device__ void __llvm_fence_rel_dev(void); -extern "C" __device__ void __llvm_fence_rel_sys(void); +typedef enum __memory_scope { + __memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + __memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + __memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + __memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, + __memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +} __memory_scope; -extern "C" __device__ void __llvm_fence_ar_sg(void); -extern "C" __device__ void __llvm_fence_ar_wg(void); -extern "C" __device__ void __llvm_fence_ar_dev(void); -extern "C" __device__ void __llvm_fence_ar_sys(void); +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum __memory_order +{ + __memory_order_relaxed = __ATOMIC_RELAXED, + __memory_order_acquire = __ATOMIC_ACQUIRE, + __memory_order_release = __ATOMIC_RELEASE, + __memory_order_acq_rel = __ATOMIC_ACQ_REL, + __memory_order_seq_cst = __ATOMIC_SEQ_CST +} __memory_order; - -extern "C" __device__ void __llvm_fence_sc_sg(void); -extern "C" __device__ void __llvm_fence_sc_wg(void); -extern "C" __device__ void __llvm_fence_sc_dev(void); -extern "C" __device__ void __llvm_fence_sc_sys(void); +// Linked from hip.amdgcn.bc +extern "C" __device__ void +__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope); #endif