SWDEV-548892 - Stop using __ockl_lane_id (#2186)

__lane_id already exists and is identical.
这个提交包含在:
Matt Arsenault
2025-12-19 20:34:55 +01:00
提交者 GitHub
父节点 7c989ac022
当前提交 0c0d8dc974
修改 3 个文件,包含 9 行新增7 行删除
@@ -640,7 +640,7 @@ __device__ inline void __named_sync() { __builtin_amdgcn_s_barrier(); }
// hip.amdgcn.bc - lanemask
__device__ inline __hip_uint64_t __lanemask_gt() {
__hip_uint32_t lane = __ockl_lane_u32();
__hip_uint32_t lane = __lane_id();
if (lane == 63) return 0;
__hip_uint64_t ballot = __ballot64(1);
__hip_uint64_t mask = (~((__hip_uint64_t)0)) << (lane + 1);
@@ -648,14 +648,14 @@ __device__ inline __hip_uint64_t __lanemask_gt() {
}
__device__ inline __hip_uint64_t __lanemask_lt() {
__hip_uint32_t lane = __ockl_lane_u32();
__hip_uint32_t lane = __lane_id();
__hip_int64_t ballot = __ballot64(1);
__hip_uint64_t mask = ((__hip_uint64_t)1 << lane) - (__hip_uint64_t)1;
return mask & ballot;
}
__device__ inline __hip_uint64_t __lanemask_eq() {
__hip_uint32_t lane = __ockl_lane_u32();
__hip_uint32_t lane = __lane_id();
__hip_int64_t mask = ((__hip_uint64_t)1 << lane);
return mask;
}
@@ -357,7 +357,7 @@ __device__ inline T __reduce_op_sync(MaskT mask, T val, BinaryOp op, WfReduce wf
#endif
firstLane = __builtin_ctzll(mask);
laneId = __ockl_lane_u32();
laneId = __lane_id();
nextBit = laneId;
// the number of iterations needs to be at least log2(number of bits on)
numIterations = sizeof(int) * 8 - __clz(maskNumBits);
@@ -91,11 +91,13 @@ __global__ void reduceAllAtomics(T* __restrict__ output, const T* __restrict__ i
__syncthreads();
if (mask & (1ul << __ockl_lane_u32())) op(&result[numWarp], input[idx]);
uint lane = __lane_id();
if (mask & (1ul << lane)) op(&result[numWarp], input[idx]);
__syncthreads();
if (__ockl_lane_u32() == 0) output[idx / warpSize] = result[numWarp];
if (lane == 0) output[idx / warpSize] = result[numWarp];
}
template <class T, template <typename> class Op>
@@ -104,7 +106,7 @@ __global__ void reduceOpSync(T* __restrict__ output, const T* __restrict__ input
int idx = threadIdx.x + blockIdx.x * kBlockDim;
T result;
if (mask & (1ul << __ockl_lane_u32())) {
if (mask & (1ul << __lane_id())) {
if constexpr (std::is_same<Op<T>, std::plus<T>>::value)
result = __reduce_add_sync(mask, input[idx]);
else if constexpr (std::is_same<Op<T>, MinOp<T>>::value)