diff --git a/include/roc_shmem/roc_shmem.hpp b/include/roc_shmem/roc_shmem.hpp index 7a0b58ad6a..faef8a7502 100644 --- a/include/roc_shmem/roc_shmem.hpp +++ b/include/roc_shmem/roc_shmem.hpp @@ -870,16 +870,12 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system(); */ #define BROADCAST_API_GEN(T, TNAME) \ __device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_wg_broadcast( \ - roc_shmem_ctx_t ctx, T *dest, const T *source, int nelem, int pe_root, \ - int pe_start, int log_pe_stride, int pe_size, \ - long *p_sync); /* NOLINT */ \ + roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \ + int nelem, int pe_root); /* NOLINT */ \ __host__ void roc_shmem_ctx_##TNAME##_broadcast( \ roc_shmem_ctx_t ctx, T *dest, const T *source, int nelem, int pe_root, \ int pe_start, int log_pe_stride, int pe_size, \ long *p_sync); /* NOLINT */ \ - __device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_wg_broadcast( \ - roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \ - int nelem, int pe_root); /* NOLINT */ \ __host__ void roc_shmem_ctx_##TNAME##_broadcast( \ roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \ int nelem, int pe_root); /* NOLINT */ diff --git a/src/roc_shmem_gpu.cpp b/src/roc_shmem_gpu.cpp index 87635a4160..1f1867724b 100644 --- a/src/roc_shmem_gpu.cpp +++ b/src/roc_shmem_gpu.cpp @@ -437,17 +437,6 @@ __device__ void roc_shmem_wg_to_all(roc_shmem_ctx_t ctx, roc_shmem_team_t team, get_internal_ctx(ctx)->to_all(team, dest, source, nreduce); } -template -__device__ void roc_shmem_wg_broadcast(roc_shmem_ctx_t ctx, T *dest, - const T *source, int nelem, int pe_root, - int pe_start, int log_pe_stride, - int pe_size, long *p_sync) { - GPU_DPRINTF("Function: roc_shmem_broadcast\n"); - - get_internal_ctx(ctx)->broadcast(dest, source, nelem, pe_root, pe_start, - log_pe_stride, pe_size, p_sync); -} - template __device__ void roc_shmem_wg_broadcast(roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, @@ -905,9 +894,6 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team, template __device__ void roc_shmem_get_nbi(T * dest, const T *source, \ size_t nelems, int pe); \ template __device__ T roc_shmem_g(const T *source, int pe); \ - template __device__ void roc_shmem_wg_broadcast( \ - roc_shmem_ctx_t ctx, T * dest, const T *source, int nelem, int pe_root, \ - int pe_start, int log_pe_stride, int pe_size, long *p_sync); \ template __device__ void roc_shmem_wg_broadcast( \ roc_shmem_ctx_t ctx, roc_shmem_team_t team, T * dest, const T *source, \ int nelem, int pe_root); \ @@ -1220,12 +1206,6 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team, size_t nelems, int pe) { \ roc_shmemx_get_nbi_wg(dest, source, nelems, pe); \ } \ - __device__ void roc_shmem_ctx_##TNAME##_wg_broadcast( \ - roc_shmem_ctx_t ctx, T *dest, const T *source, int nelem, int pe_root, \ - int pe_start, int log_pe_stride, int pe_size, long *p_sync) { \ - roc_shmem_wg_broadcast(ctx, dest, source, nelem, pe_root, pe_start, \ - log_pe_stride, pe_size, p_sync); \ - } \ __device__ void roc_shmem_ctx_##TNAME##_wg_broadcast( \ roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \ int nelem, int pe_root) { \