Remove noinline attribute from reduceCopyPacks and (#1864)

reduceCopyPacksWithBias.

[ROCm/rccl commit: fb882e80f6]
This commit is contained in:
Arm Patinyasakdikul
2025-08-19 20:24:31 -05:00
committed by GitHub
orang tua 8c5095dd94
melakukan d4fecfb0be
@@ -32,11 +32,7 @@ template<typename RedFn, typename T, int Unroll, int BytePerPack,
int MultimemSrcs, int MinSrcs, int MaxSrcs,
int MultimemDsts, int MinDsts, int MaxDsts, int PreOpSrcs,
typename IntBytes, typename SrcPtrFn, typename DstPtrFn>
#if defined(__gfx942__) || defined(__gfx950__)
__device__ __forceinline__ void reduceCopyPacks(
#else
__device__ __attribute__((noinline)) void reduceCopyPacks(
#endif
int nThreads, int &thread,
uint64_t redArg, uint64_t *preOpArgs, bool postOp,
int nSrcs, SrcPtrFn const &srcPtrFn, int nDsts, DstPtrFn const &dstPtrFn,
@@ -428,11 +424,7 @@ template<typename RedFn, typename T, int Unroll, int BytePerPack,
int MultimemSrcs, int MinSrcs, int MaxSrcs,
int MultimemDsts, int MinDsts, int MaxDsts, int PreOpSrcs,
typename IntBytes, typename SrcPtrFn, typename DstPtrFn, typename AccPtrFn>
#if defined(__gfx942__) || defined(__gfx950__)
__device__ __forceinline__ void reduceCopyPacksWithBias(
#else
__device__ __attribute__((noinline)) void reduceCopyPacksWithBias(
#endif
int nThreads, int &thread,
uint64_t redArg, uint64_t *preOpArgs, bool postOp,
int nSrcs, SrcPtrFn const &srcPtrFn, int nDsts, DstPtrFn const &dstPtrFn,