Remove device-side active-set-based broadcast API interface from rocSHMEM

[ROCm/rocshmem commit: e1ff06913c]
Этот коммит содержится в:
avinashkethineedi
2024-10-29 19:04:49 +00:00
родитель daae6f4d60
Коммит 058aa3ccf3
2 изменённых файлов: 2 добавлений и 26 удалений
+2 -6
Просмотреть файл
@@ -870,16 +870,12 @@ __device__ ATTR_NO_INLINE void roc_shmem_threadfence_system();
*/
#define BROADCAST_API_GEN(T, TNAME) \
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_wg_broadcast( \
roc_shmem_ctx_t ctx, T *dest, const T *source, int nelem, int pe_root, \
int pe_start, int log_pe_stride, int pe_size, \
long *p_sync); /* NOLINT */ \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \
int nelem, int pe_root); /* NOLINT */ \
__host__ void roc_shmem_ctx_##TNAME##_broadcast( \
roc_shmem_ctx_t ctx, T *dest, const T *source, int nelem, int pe_root, \
int pe_start, int log_pe_stride, int pe_size, \
long *p_sync); /* NOLINT */ \
__device__ ATTR_NO_INLINE void roc_shmem_ctx_##TNAME##_wg_broadcast( \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \
int nelem, int pe_root); /* NOLINT */ \
__host__ void roc_shmem_ctx_##TNAME##_broadcast( \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \
int nelem, int pe_root); /* NOLINT */
-20
Просмотреть файл
@@ -437,17 +437,6 @@ __device__ void roc_shmem_wg_to_all(roc_shmem_ctx_t ctx, roc_shmem_team_t team,
get_internal_ctx(ctx)->to_all<T, Op>(team, dest, source, nreduce);
}
template <typename T>
__device__ void roc_shmem_wg_broadcast(roc_shmem_ctx_t ctx, T *dest,
const T *source, int nelem, int pe_root,
int pe_start, int log_pe_stride,
int pe_size, long *p_sync) {
GPU_DPRINTF("Function: roc_shmem_broadcast\n");
get_internal_ctx(ctx)->broadcast<T>(dest, source, nelem, pe_root, pe_start,
log_pe_stride, pe_size, p_sync);
}
template <typename T>
__device__ void roc_shmem_wg_broadcast(roc_shmem_ctx_t ctx,
roc_shmem_team_t team, T *dest,
@@ -905,9 +894,6 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
template __device__ void roc_shmem_get_nbi<T>(T * dest, const T *source, \
size_t nelems, int pe); \
template __device__ T roc_shmem_g<T>(const T *source, int pe); \
template __device__ void roc_shmem_wg_broadcast<T>( \
roc_shmem_ctx_t ctx, T * dest, const T *source, int nelem, int pe_root, \
int pe_start, int log_pe_stride, int pe_size, long *p_sync); \
template __device__ void roc_shmem_wg_broadcast<T>( \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T * dest, const T *source, \
int nelem, int pe_root); \
@@ -1220,12 +1206,6 @@ __device__ int roc_shmem_team_translate_pe(roc_shmem_team_t src_team,
size_t nelems, int pe) { \
roc_shmemx_get_nbi_wg<T>(dest, source, nelems, pe); \
} \
__device__ void roc_shmem_ctx_##TNAME##_wg_broadcast( \
roc_shmem_ctx_t ctx, T *dest, const T *source, int nelem, int pe_root, \
int pe_start, int log_pe_stride, int pe_size, long *p_sync) { \
roc_shmem_wg_broadcast<T>(ctx, dest, source, nelem, pe_root, pe_start, \
log_pe_stride, pe_size, p_sync); \
} \
__device__ void roc_shmem_ctx_##TNAME##_wg_broadcast( \
roc_shmem_ctx_t ctx, roc_shmem_team_t team, T *dest, const T *source, \
int nelem, int pe_root) { \