Add __attribute__((const)) to grid related functions declarations
This is cherrypick of Daniil Fukalov's PR https://github.com/ROCm-Developer-Tools/HIP/pull/2110 which has been committed to master branch. Make declarations consistent with https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/amd-stg-open/ockl/src/workitem.cl Without the attribute these functions don't have "readnone" LLVM IR attribute. Without it some optimizations fails, e.g. Loop Invariant Code Motion doesn't hoist these calls out of a loop. Change-Id: Idb599570d142152cc4f6a3c8986384ad7f0c4729
Этот коммит содержится в:
коммит произвёл
Yaxun (Sam) Liu
родитель
0b7703560a
Коммит
359ea4fcaf
@@ -446,22 +446,22 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
#pragma push_macro("__DEVICE__")
|
||||
#define __DEVICE__ static __device__ __forceinline__
|
||||
|
||||
extern "C" __device__ size_t __ockl_get_local_id(uint);
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint);
|
||||
__DEVICE__ uint __hip_get_thread_idx_x() { return __ockl_get_local_id(0); }
|
||||
__DEVICE__ uint __hip_get_thread_idx_y() { return __ockl_get_local_id(1); }
|
||||
__DEVICE__ uint __hip_get_thread_idx_z() { return __ockl_get_local_id(2); }
|
||||
|
||||
extern "C" __device__ size_t __ockl_get_group_id(uint);
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint);
|
||||
__DEVICE__ uint __hip_get_block_idx_x() { return __ockl_get_group_id(0); }
|
||||
__DEVICE__ uint __hip_get_block_idx_y() { return __ockl_get_group_id(1); }
|
||||
__DEVICE__ uint __hip_get_block_idx_z() { return __ockl_get_group_id(2); }
|
||||
|
||||
extern "C" __device__ size_t __ockl_get_local_size(uint);
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint);
|
||||
__DEVICE__ uint __hip_get_block_dim_x() { return __ockl_get_local_size(0); }
|
||||
__DEVICE__ uint __hip_get_block_dim_y() { return __ockl_get_local_size(1); }
|
||||
__DEVICE__ uint __hip_get_block_dim_z() { return __ockl_get_local_size(2); }
|
||||
|
||||
extern "C" __device__ size_t __ockl_get_num_groups(uint);
|
||||
extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint);
|
||||
__DEVICE__ uint __hip_get_grid_dim_x() { return __ockl_get_num_groups(0); }
|
||||
__DEVICE__ uint __hip_get_grid_dim_y() { return __ockl_get_num_groups(1); }
|
||||
__DEVICE__ uint __hip_get_grid_dim_z() { return __ockl_get_num_groups(2); }
|
||||
|
||||
Ссылка в новой задаче
Block a user