SWDEV-548892 - Stop using __ockl_lane_id (#2186)

__lane_id already exists and is identical.
2025-12-19 20:34:55 +01:00
@@ -640,7 +640,7 @@ __device__ inline void __named_sync() { __builtin_amdgcn_s_barrier(); }

 // hip.amdgcn.bc - lanemask
 __device__ inline __hip_uint64_t __lanemask_gt() {
-  __hip_uint32_t lane = __ockl_lane_u32();
+  __hip_uint32_t lane = __lane_id();
  if (lane == 63) return 0;
  __hip_uint64_t ballot = __ballot64(1);
  __hip_uint64_t mask = (~((__hip_uint64_t)0)) << (lane + 1);
@@ -648,14 +648,14 @@ __device__ inline __hip_uint64_t __lanemask_gt() {
 }

 __device__ inline __hip_uint64_t __lanemask_lt() {
-  __hip_uint32_t lane = __ockl_lane_u32();
+  __hip_uint32_t lane = __lane_id();
  __hip_int64_t ballot = __ballot64(1);
  __hip_uint64_t mask = ((__hip_uint64_t)1 << lane) - (__hip_uint64_t)1;
  return mask & ballot;
 }

 __device__ inline __hip_uint64_t __lanemask_eq() {
-  __hip_uint32_t lane = __ockl_lane_u32();
+  __hip_uint32_t lane = __lane_id();
  __hip_int64_t mask = ((__hip_uint64_t)1 << lane);
  return mask;
 }
@@ -357,7 +357,7 @@ __device__ inline T __reduce_op_sync(MaskT mask, T val, BinaryOp op, WfReduce wf
 #endif

  firstLane = __builtin_ctzll(mask);
-  laneId = __ockl_lane_u32();
+  laneId = __lane_id();
  nextBit = laneId;
  // the number of iterations needs to be at least log2(number of bits on)
  numIterations = sizeof(int) * 8 - __clz(maskNumBits);
@@ -91,11 +91,13 @@ __global__ void reduceAllAtomics(T* __restrict__ output, const T* __restrict__ i

  __syncthreads();

-  if (mask & (1ul << __ockl_lane_u32())) op(&result[numWarp], input[idx]);
+  uint lane = __lane_id();
+
+  if (mask & (1ul << lane)) op(&result[numWarp], input[idx]);

  __syncthreads();

-  if (__ockl_lane_u32() == 0) output[idx / warpSize] = result[numWarp];
+  if (lane == 0) output[idx / warpSize] = result[numWarp];
 }

 template <class T, template <typename> class Op>
@@ -104,7 +106,7 @@ __global__ void reduceOpSync(T* __restrict__ output, const T* __restrict__ input
  int idx = threadIdx.x + blockIdx.x * kBlockDim;
  T result;

-  if (mask & (1ul << __ockl_lane_u32())) {
+  if (mask & (1ul << __lane_id())) {
    if constexpr (std::is_same<Op<T>, std::plus<T>>::value)
      result = __reduce_add_sync(mask, input[idx]);
    else if constexpr (std::is_same<Op<T>, MinOp<T>>::value)