@@ -752,80 +752,6 @@ void *__amdgcn_get_dynamicgroupbaseptr() {
|
||||
return __get_dynamicgroupbaseptr();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// hip.amdgcn.bc - sync threads
|
||||
#define __CLK_LOCAL_MEM_FENCE 0x01
|
||||
typedef unsigned __cl_mem_fence_flags;
|
||||
|
||||
typedef enum __memory_scope {
|
||||
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
|
||||
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
|
||||
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
|
||||
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
|
||||
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
|
||||
} __memory_scope;
|
||||
|
||||
// enum values aligned with what clang uses in EmitAtomicExpr()
|
||||
typedef enum __memory_order
|
||||
{
|
||||
__memory_order_relaxed = __ATOMIC_RELAXED,
|
||||
__memory_order_acquire = __ATOMIC_ACQUIRE,
|
||||
__memory_order_release = __ATOMIC_RELEASE,
|
||||
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
|
||||
__memory_order_seq_cst = __ATOMIC_SEQ_CST
|
||||
} __memory_order;
|
||||
|
||||
__device__
|
||||
inline
|
||||
static void
|
||||
__atomic_work_item_fence(__cl_mem_fence_flags flags, __memory_order order, __memory_scope scope)
|
||||
{
|
||||
// We're tying global-happens-before and local-happens-before together as does HSA
|
||||
if (order != __memory_order_relaxed) {
|
||||
switch (scope) {
|
||||
case __memory_scope_work_item:
|
||||
break;
|
||||
case __memory_scope_sub_group:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_sg(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_sg(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_sg(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_sg(); break;
|
||||
}
|
||||
break;
|
||||
case __memory_scope_work_group:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_wg(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_wg(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_wg(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_wg(); break;
|
||||
}
|
||||
break;
|
||||
case __memory_scope_device:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_dev(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_dev(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_dev(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_dev(); break;
|
||||
}
|
||||
break;
|
||||
case __memory_scope_all_svm_devices:
|
||||
switch (order) {
|
||||
case __memory_order_relaxed: break;
|
||||
case __memory_order_acquire: __llvm_fence_acq_sys(); break;
|
||||
case __memory_order_release: __llvm_fence_rel_sys(); break;
|
||||
case __memory_order_acq_rel: __llvm_fence_ar_sys(); break;
|
||||
case __memory_order_seq_cst: __llvm_fence_sc_sys(); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Memory Fence Functions
|
||||
__device__
|
||||
inline
|
||||
|
||||
@@ -65,26 +65,30 @@ extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float)
|
||||
__device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
|
||||
#endif //__HIP_DEVICE_COMPILE__
|
||||
|
||||
// __llvm_fence* functions from device-libs/irif/src/fence.ll
|
||||
extern "C" __device__ void __llvm_fence_acq_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_acq_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_acq_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_acq_sys(void);
|
||||
// Using hip.amdgcn.bc - sync threads
|
||||
#define __CLK_LOCAL_MEM_FENCE 0x01
|
||||
typedef unsigned __cl_mem_fence_flags;
|
||||
|
||||
extern "C" __device__ void __llvm_fence_rel_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_rel_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_rel_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_rel_sys(void);
|
||||
typedef enum __memory_scope {
|
||||
__memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
|
||||
__memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
|
||||
__memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
|
||||
__memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
|
||||
__memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
|
||||
} __memory_scope;
|
||||
|
||||
extern "C" __device__ void __llvm_fence_ar_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_ar_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_ar_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_ar_sys(void);
|
||||
// enum values aligned with what clang uses in EmitAtomicExpr()
|
||||
typedef enum __memory_order
|
||||
{
|
||||
__memory_order_relaxed = __ATOMIC_RELAXED,
|
||||
__memory_order_acquire = __ATOMIC_ACQUIRE,
|
||||
__memory_order_release = __ATOMIC_RELEASE,
|
||||
__memory_order_acq_rel = __ATOMIC_ACQ_REL,
|
||||
__memory_order_seq_cst = __ATOMIC_SEQ_CST
|
||||
} __memory_order;
|
||||
|
||||
|
||||
extern "C" __device__ void __llvm_fence_sc_sg(void);
|
||||
extern "C" __device__ void __llvm_fence_sc_wg(void);
|
||||
extern "C" __device__ void __llvm_fence_sc_dev(void);
|
||||
extern "C" __device__ void __llvm_fence_sc_sys(void);
|
||||
// Linked from hip.amdgcn.bc
|
||||
extern "C" __device__ void
|
||||
__atomic_work_item_fence(__cl_mem_fence_flags, __memory_order, __memory_scope);
|
||||
|
||||
#endif
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする