Replace IRIF fences with atomic_work_item_fence

[ROCm/clr commit: 03822afaa9]
このコミットが含まれているのは:
Aaron Enye Shi
2018-10-04 21:47:28 +00:00
コミット 1835636df3
2個のファイルの変更22行の追加92行の削除
-74
ファイルの表示
@@ -752,80 +752,6 @@ void *__amdgcn_get_dynamicgroupbaseptr() {
return __get_dynamicgroupbaseptr();
}
// hip.amdgcn.bc - sync threads
#define __CLK_LOCAL_MEM_FENCE 0x01
typedef unsigned __cl_mem_fence_flags;
typedef enum __memory_scope {
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
} __memory_scope;
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum __memory_order
{
__memory_order_relaxed = __ATOMIC_RELAXED,
__memory_order_acquire = __ATOMIC_ACQUIRE,
__memory_order_release = __ATOMIC_RELEASE,
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
__memory_order_seq_cst = __ATOMIC_SEQ_CST
} __memory_order;
__device__
inline
static void
__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
{
// We're tying global-happens-before and local-happens-before together as does HSA
if (order != __memory_order_relaxed) {
switch (scope) {
case __memory_scope_work_item:
break;
case __memory_scope_sub_group:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_sg(); break;
case __memory_order_release: __llvm_fence_rel_sg(); break;
case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
}
break;
case __memory_scope_work_group:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_wg(); break;
case __memory_order_release: __llvm_fence_rel_wg(); break;
case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
}
break;
case __memory_scope_device:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_dev(); break;
case __memory_order_release: __llvm_fence_rel_dev(); break;
case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
}
break;
case __memory_scope_all_svm_devices:
switch (order) {
case __memory_order_relaxed: break;
case __memory_order_acquire: __llvm_fence_acq_sys(); break;
case __memory_order_release: __llvm_fence_rel_sys(); break;
case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
}
break;
}
}
}
// Memory Fence Functions
__device__
inline
+22 -18
ファイルの表示
@@ -65,26 +65,30 @@ extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float)
__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
#endif //__HIP_DEVICE_COMPILE__
// __llvm_fence* functions from device-libs/irif/src/fence.ll
extern "C" __device__ void __llvm_fence_acq_sg(void);
extern "C" __device__ void __llvm_fence_acq_wg(void);
extern "C" __device__ void __llvm_fence_acq_dev(void);
extern "C" __device__ void __llvm_fence_acq_sys(void);
// Using hip.amdgcn.bc - sync threads
#define __CLK_LOCAL_MEM_FENCE 0x01
typedef unsigned __cl_mem_fence_flags;
extern "C" __device__ void __llvm_fence_rel_sg(void);
extern "C" __device__ void __llvm_fence_rel_wg(void);
extern "C" __device__ void __llvm_fence_rel_dev(void);
extern "C" __device__ void __llvm_fence_rel_sys(void);
typedef enum __memory_scope {
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
} __memory_scope;
extern "C" __device__ void __llvm_fence_ar_sg(void);
extern "C" __device__ void __llvm_fence_ar_wg(void);
extern "C" __device__ void __llvm_fence_ar_dev(void);
extern "C" __device__ void __llvm_fence_ar_sys(void);
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum __memory_order
{
__memory_order_relaxed = __ATOMIC_RELAXED,
__memory_order_acquire = __ATOMIC_ACQUIRE,
__memory_order_release = __ATOMIC_RELEASE,
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
__memory_order_seq_cst = __ATOMIC_SEQ_CST
} __memory_order;
extern "C" __device__ void __llvm_fence_sc_sg(void);
extern "C" __device__ void __llvm_fence_sc_wg(void);
extern "C" __device__ void __llvm_fence_sc_dev(void);
extern "C" __device__ void __llvm_fence_sc_sys(void);
// Linked from hip.amdgcn.bc
extern "C" __device__ void
__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
#endif