Add header files based on sections in OpenSHMEM specifications
* rocshmem_RMA.hpp
* rocshmem_AMO.hpp
* rocshmem_SIG_OP.hpp
* rocshmem_COLL.hpp
* rocshmem_P2P_SYNC.hpp
* rocshmem_RMA_X.hpp
[ROCm/rocshmem commit: 3117a47b8d]
Этот коммит содержится в:
@@ -190,7 +190,7 @@ set_target_properties(
|
||||
${PROJECT_NAME}
|
||||
PROPERTIES
|
||||
PUBLIC_HEADER
|
||||
"${CMAKE_BINARY_DIR}/config.h;${CMAKE_CURRENT_SOURCE_DIR}/include/rocshmem/rocshmem.hpp;${CMAKE_CURRENT_SOURCE_DIR}/include/rocshmem/debug.hpp"
|
||||
"${CMAKE_BINARY_DIR}/config.h"
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
@@ -386,6 +386,11 @@ install(
|
||||
COMPONENT bin
|
||||
)
|
||||
|
||||
install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/
|
||||
DESTINATION ${INSTALL_INCLUDEDIR}
|
||||
COMPONENT dev
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT
|
||||
${PROJECT_NAME}Targets
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_SRC_DIR/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_SRC_DIR/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_SRC_DIR/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_SRC_DIR/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_SRC_DIR/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -0,0 +1,603 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef LIBRARY_INCLUDE_ROCSHMEM_COLL_HPP
|
||||
#define LIBRARY_INCLUDE_ROCSHMEM_COLL_HPP
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
/**
|
||||
* @name SHMEM_ALLTOALL
|
||||
* @brief Exchanges a fixed amount of contiguous data blocks between all pairs
|
||||
* of PEs participating in the collective routine.
|
||||
*
|
||||
* This function must be called as a work-group collective.
|
||||
*
|
||||
* @param[in] team The team participating in the collective.
|
||||
* @param[in] dest Destination address. Must be an address on the
|
||||
* symmetric heap.
|
||||
* @param[in] source Source address. Must be an address on the symmetric
|
||||
heap.
|
||||
* @param[in] nelems Number of data blocks transferred per pair of PEs.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest,
|
||||
const float *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest,
|
||||
const double *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, char *dest,
|
||||
const char *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, signed char *dest,
|
||||
const signed char *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest,
|
||||
const short *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest,
|
||||
const int *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest,
|
||||
const long *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest,
|
||||
const long long *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned char *dest,
|
||||
const unsigned char *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned short *dest,
|
||||
const unsigned short *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned int *dest,
|
||||
const unsigned int *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long *dest,
|
||||
const unsigned long *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_wg_alltoall(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long long *dest,
|
||||
const unsigned long long *source, int nelems);
|
||||
|
||||
|
||||
/**
|
||||
* @name SHMEM_BROADCAST
|
||||
* @brief Perform a broadcast between PEs in the active set. The caller
|
||||
* is blocked until the broadcase completes.
|
||||
*
|
||||
* This function must be called as a work-group collective.
|
||||
*
|
||||
* @param[in] dest Destination address. Must be an address on the
|
||||
* symmetric heap.
|
||||
* @param[in] source Source address. Must be an address on the symmetric
|
||||
heap.
|
||||
* @param[in] nelement Size of the buffer to participate in the broadcast.
|
||||
* @param[in] PE_root Zero-based ordinal of the PE, with respect to the
|
||||
active set, from which the data is copied
|
||||
* @param[in] PE_start PE to start the reduction.
|
||||
* @param[in] logPE_stride Stride of PEs participating in the reduction.
|
||||
* @param[in] PE_size Number PEs participating in the reduction.
|
||||
* @param[in] pSync Temporary sync buffer provided to ROCSHMEM. Must
|
||||
be of size at least ROCSHMEM_REDUCE_SYNC_SIZE.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest,
|
||||
const float *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_float_broadcast(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_float_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest,
|
||||
const float *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest,
|
||||
const double *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_double_broadcast(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_double_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest,
|
||||
const double *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, char *dest,
|
||||
const char *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_char_broadcast(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_char_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, char *dest,
|
||||
const char *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, signed char *dest,
|
||||
const signed char *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_schar_broadcast(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_schar_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, signed char *dest,
|
||||
const signed char *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest,
|
||||
const short *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_short_broadcast(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_short_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest,
|
||||
const short *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest,
|
||||
const int *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_int_broadcast(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_int_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest,
|
||||
const int *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest,
|
||||
const long *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_long_broadcast(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_long_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest,
|
||||
const long *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest,
|
||||
const long long *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_longlong_broadcast(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_longlong_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest,
|
||||
const long long *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned char *dest,
|
||||
const unsigned char *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_uchar_broadcast(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_uchar_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned char *dest,
|
||||
const unsigned char *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned short *dest,
|
||||
const unsigned short *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_ushort_broadcast(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_ushort_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned short *dest,
|
||||
const unsigned short *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned int *dest,
|
||||
const unsigned int *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_uint_broadcast(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_uint_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned int *dest,
|
||||
const unsigned int *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long *dest,
|
||||
const unsigned long *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_ulong_broadcast(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_ulong_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long *dest,
|
||||
const unsigned long *source, int nelems, int pe_root);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_wg_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long long *dest,
|
||||
const unsigned long long *source, int nelems, int pe_root);
|
||||
__host__ void rocshmem_ctx_ulonglong_broadcast(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source,
|
||||
int nelems, int pe_root, int pe_start, int log_pe_stride,
|
||||
int pe_size, long *p_sync);
|
||||
__host__ void rocshmem_ctx_ulonglong_broadcast(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long long *dest,
|
||||
const unsigned long long *source, int nelems, int pe_root);
|
||||
|
||||
|
||||
/**
|
||||
* @name SHMEM_FCOLLECT
|
||||
* @brief Concatenates blocks of data from multiple PEs to an array in every
|
||||
* PE participating in the collective routine.
|
||||
*
|
||||
* This function must be called as a work-group collective.
|
||||
*
|
||||
* @param[in] team The team participating in the collective.
|
||||
* @param[in] dest Destination address. Must be an address on the
|
||||
* symmetric heap.
|
||||
* @param[in] source Source address. Must be an address on the symmetric
|
||||
heap.
|
||||
* @param[in] nelems Number of data blocks in source array.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest,
|
||||
const float *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest,
|
||||
const double *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, char *dest,
|
||||
const char *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, signed char *dest,
|
||||
const signed char *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest,
|
||||
const short *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest,
|
||||
const int *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest,
|
||||
const long *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest,
|
||||
const long long *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned char *dest,
|
||||
const unsigned char *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned short *dest,
|
||||
const unsigned short *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned int *dest,
|
||||
const unsigned int *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long *dest,
|
||||
const unsigned long *source, int nelems);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_wg_fcollect(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, unsigned long long *dest,
|
||||
const unsigned long long *source, int nelems);
|
||||
|
||||
|
||||
/**
|
||||
* @name SHMEM_REDUCTIONS
|
||||
* @brief Perform an allreduce between PEs in the active set. The caller
|
||||
* is blocked until the reduction completes.
|
||||
*
|
||||
* This function must be called as a work-group collective.
|
||||
*
|
||||
* @param[in] team The team participating in the collective.
|
||||
* @param[in] dest Destination address. Must be an address on the
|
||||
* symmetric heap.
|
||||
* @param[in] source Source address. Must be an address on the symmetric
|
||||
heap.
|
||||
* @param[in] nreduce Size of the buffer to participate in the reduction.
|
||||
*
|
||||
* @return int (Zero on successful local completion. Nonzero otherwise.)
|
||||
*/
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_sum_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_sum_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_min_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_min_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_max_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_max_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_prod_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_prod_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_or_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_or_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_and_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_and_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_short_xor_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_short_xor_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, short *dest, const short *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_sum_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_sum_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_min_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_min_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_max_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_max_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_prod_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_prod_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_or_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_or_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_and_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_and_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_int_xor_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_int_xor_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, int *dest, const int *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_sum_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_sum_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_min_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_min_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_max_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_max_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_prod_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_prod_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_or_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_or_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_and_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_and_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_long_xor_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_long_xor_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long *dest, const long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_sum_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_sum_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_min_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_min_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_max_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_max_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_prod_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_prod_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_or_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_or_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_and_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_and_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_longlong_xor_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_longlong_xor_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, long long *dest, const long long *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_float_sum_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_float_sum_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_float_min_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_float_min_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_float_max_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_float_max_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_float_prod_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_float_prod_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, float *dest, const float *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_double_sum_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_double_sum_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_double_min_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_double_min_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_double_max_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_double_max_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
|
||||
__device__ ATTR_NO_INLINE int rocshmem_ctx_double_prod_wg_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
__host__ int rocshmem_ctx_double_prod_reduce(
|
||||
rocshmem_ctx_t ctx, rocshmem_team_t team, double *dest, const double *source,
|
||||
int nreduce);
|
||||
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_INCLUDE_ROCSHMEM_COLL_HPP
|
||||
@@ -0,0 +1,662 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef LIBRARY_INCLUDE_ROCSHMEM_P2P_SYNC_HPP
|
||||
#define LIBRARY_INCLUDE_ROCSHMEM_P2P_SYNC_HPP
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
/**
|
||||
* @name SHMEM_WAIT_UNTIL
|
||||
* @brief Block the caller until the condition (* \p ptr \p cmps \p val) is
|
||||
* true.
|
||||
*
|
||||
* This function can be called from divergent control paths at per-thread
|
||||
* granularity. However, performance may be improved if the caller can
|
||||
* coalesce contiguous messages and elect a leader thread to call into the
|
||||
* ROCSHMEM function.
|
||||
*
|
||||
* @param[in] ivars Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] cmp Operation for the comparison.
|
||||
* @param[in] val Value to compare the memory at \p ptr to.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
__device__ void rocshmem_float_wait_until(
|
||||
float *ivars, int cmp, float val);
|
||||
__device__ size_t rocshmem_float_wait_until_any(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__device__ void rocshmem_float_wait_until_all(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__device__ size_t rocshmem_float_wait_until_some(
|
||||
float *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, float val);
|
||||
__device__ size_t rocshmem_float_wait_until_any_vector(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__device__ void rocshmem_float_wait_until_all_vector(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__device__ size_t rocshmem_float_wait_until_some_vector(
|
||||
float *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, float val);
|
||||
__host__ void rocshmem_float_wait_until(
|
||||
float *ivars, int cmp, float val);
|
||||
__host__ size_t rocshmem_float_wait_until_any(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__host__ void rocshmem_float_wait_until_all(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__host__ size_t rocshmem_float_wait_until_some(
|
||||
float *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, float val);
|
||||
__host__ size_t rocshmem_float_wait_until_any_vector(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__host__ void rocshmem_float_wait_until_all_vector(
|
||||
float *ivars, size_t nelems, const int* status,
|
||||
int cmp, float val);
|
||||
__host__ size_t rocshmem_float_wait_until_some_vector(
|
||||
float *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, float val);
|
||||
|
||||
__device__ void rocshmem_double_wait_until(
|
||||
double *ivars, int cmp, double val);
|
||||
__device__ size_t rocshmem_double_wait_until_any(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__device__ void rocshmem_double_wait_until_all(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__device__ size_t rocshmem_double_wait_until_some(
|
||||
double *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, double val);
|
||||
__device__ size_t rocshmem_double_wait_until_any_vector(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__device__ void rocshmem_double_wait_until_all_vector(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__device__ size_t rocshmem_double_wait_until_some_vector(
|
||||
double *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, double val);
|
||||
__host__ void rocshmem_double_wait_until(
|
||||
double *ivars, int cmp, double val);
|
||||
__host__ size_t rocshmem_double_wait_until_any(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__host__ void rocshmem_double_wait_until_all(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__host__ size_t rocshmem_double_wait_until_some(
|
||||
double *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, double val);
|
||||
__host__ size_t rocshmem_double_wait_until_any_vector(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__host__ void rocshmem_double_wait_until_all_vector(
|
||||
double *ivars, size_t nelems, const int* status,
|
||||
int cmp, double val);
|
||||
__host__ size_t rocshmem_double_wait_until_some_vector(
|
||||
double *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, double val);
|
||||
|
||||
__device__ void rocshmem_char_wait_until(
|
||||
char *ivars, int cmp, char val);
|
||||
__device__ size_t rocshmem_char_wait_until_any(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__device__ void rocshmem_char_wait_until_all(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__device__ size_t rocshmem_char_wait_until_some(
|
||||
char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, char val);
|
||||
__device__ size_t rocshmem_char_wait_until_any_vector(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__device__ void rocshmem_char_wait_until_all_vector(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__device__ size_t rocshmem_char_wait_until_some_vector(
|
||||
char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, char val);
|
||||
__host__ void rocshmem_char_wait_until(
|
||||
char *ivars, int cmp, char val);
|
||||
__host__ size_t rocshmem_char_wait_until_any(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__host__ void rocshmem_char_wait_until_all(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__host__ size_t rocshmem_char_wait_until_some(
|
||||
char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, char val);
|
||||
__host__ size_t rocshmem_char_wait_until_any_vector(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__host__ void rocshmem_char_wait_until_all_vector(
|
||||
char *ivars, size_t nelems, const int* status,
|
||||
int cmp, char val);
|
||||
__host__ size_t rocshmem_char_wait_until_some_vector(
|
||||
char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, char val);
|
||||
|
||||
__device__ void rocshmem_schar_wait_until(
|
||||
signed char *ivars, int cmp, signed char val);
|
||||
__device__ size_t rocshmem_schar_wait_until_any(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__device__ void rocshmem_schar_wait_until_all(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__device__ size_t rocshmem_schar_wait_until_some(
|
||||
signed char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, signed char val);
|
||||
__device__ size_t rocshmem_schar_wait_until_any_vector(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__device__ void rocshmem_schar_wait_until_all_vector(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__device__ size_t rocshmem_schar_wait_until_some_vector(
|
||||
signed char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, signed char val);
|
||||
__host__ void rocshmem_schar_wait_until(
|
||||
signed char *ivars, int cmp, signed char val);
|
||||
__host__ size_t rocshmem_schar_wait_until_any(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__host__ void rocshmem_schar_wait_until_all(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__host__ size_t rocshmem_schar_wait_until_some(
|
||||
signed char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, signed char val);
|
||||
__host__ size_t rocshmem_schar_wait_until_any_vector(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__host__ void rocshmem_schar_wait_until_all_vector(
|
||||
signed char *ivars, size_t nelems, const int* status,
|
||||
int cmp, signed char val);
|
||||
__host__ size_t rocshmem_schar_wait_until_some_vector(
|
||||
signed char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, signed char val);
|
||||
|
||||
__device__ void rocshmem_short_wait_until(
|
||||
short *ivars, int cmp, short val);
|
||||
__device__ size_t rocshmem_short_wait_until_any(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__device__ void rocshmem_short_wait_until_all(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__device__ size_t rocshmem_short_wait_until_some(
|
||||
short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, short val);
|
||||
__device__ size_t rocshmem_short_wait_until_any_vector(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__device__ void rocshmem_short_wait_until_all_vector(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__device__ size_t rocshmem_short_wait_until_some_vector(
|
||||
short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, short val);
|
||||
__host__ void rocshmem_short_wait_until(
|
||||
short *ivars, int cmp, short val);
|
||||
__host__ size_t rocshmem_short_wait_until_any(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__host__ void rocshmem_short_wait_until_all(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__host__ size_t rocshmem_short_wait_until_some(
|
||||
short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, short val);
|
||||
__host__ size_t rocshmem_short_wait_until_any_vector(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__host__ void rocshmem_short_wait_until_all_vector(
|
||||
short *ivars, size_t nelems, const int* status,
|
||||
int cmp, short val);
|
||||
__host__ size_t rocshmem_short_wait_until_some_vector(
|
||||
short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, short val);
|
||||
|
||||
__device__ void rocshmem_int_wait_until(
|
||||
int *ivars, int cmp, int val);
|
||||
__device__ size_t rocshmem_int_wait_until_any(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__device__ void rocshmem_int_wait_until_all(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__device__ size_t rocshmem_int_wait_until_some(
|
||||
int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, int val);
|
||||
__device__ size_t rocshmem_int_wait_until_any_vector(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__device__ void rocshmem_int_wait_until_all_vector(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__device__ size_t rocshmem_int_wait_until_some_vector(
|
||||
int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, int val);
|
||||
__host__ void rocshmem_int_wait_until(
|
||||
int *ivars, int cmp, int val);
|
||||
__host__ size_t rocshmem_int_wait_until_any(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__host__ void rocshmem_int_wait_until_all(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__host__ size_t rocshmem_int_wait_until_some(
|
||||
int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, int val);
|
||||
__host__ size_t rocshmem_int_wait_until_any_vector(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__host__ void rocshmem_int_wait_until_all_vector(
|
||||
int *ivars, size_t nelems, const int* status,
|
||||
int cmp, int val);
|
||||
__host__ size_t rocshmem_int_wait_until_some_vector(
|
||||
int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, int val);
|
||||
|
||||
__device__ void rocshmem_long_wait_until(
|
||||
long *ivars, int cmp, long val);
|
||||
__device__ size_t rocshmem_long_wait_until_any(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__device__ void rocshmem_long_wait_until_all(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__device__ size_t rocshmem_long_wait_until_some(
|
||||
long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long val);
|
||||
__device__ size_t rocshmem_long_wait_until_any_vector(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__device__ void rocshmem_long_wait_until_all_vector(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__device__ size_t rocshmem_long_wait_until_some_vector(
|
||||
long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long val);
|
||||
__host__ void rocshmem_long_wait_until(
|
||||
long *ivars, int cmp, long val);
|
||||
__host__ size_t rocshmem_long_wait_until_any(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__host__ void rocshmem_long_wait_until_all(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__host__ size_t rocshmem_long_wait_until_some(
|
||||
long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long val);
|
||||
__host__ size_t rocshmem_long_wait_until_any_vector(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__host__ void rocshmem_long_wait_until_all_vector(
|
||||
long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long val);
|
||||
__host__ size_t rocshmem_long_wait_until_some_vector(
|
||||
long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long val);
|
||||
|
||||
__device__ void rocshmem_longlong_wait_until(
|
||||
long long *ivars, int cmp, long long val);
|
||||
__device__ size_t rocshmem_longlong_wait_until_any(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__device__ void rocshmem_longlong_wait_until_all(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__device__ size_t rocshmem_longlong_wait_until_some(
|
||||
long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long long val);
|
||||
__device__ size_t rocshmem_longlong_wait_until_any_vector(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__device__ void rocshmem_longlong_wait_until_all_vector(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__device__ size_t rocshmem_longlong_wait_until_some_vector(
|
||||
long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long long val);
|
||||
__host__ void rocshmem_longlong_wait_until(
|
||||
long long *ivars, int cmp, long long val);
|
||||
__host__ size_t rocshmem_longlong_wait_until_any(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__host__ void rocshmem_longlong_wait_until_all(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__host__ size_t rocshmem_longlong_wait_until_some(
|
||||
long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long long val);
|
||||
__host__ size_t rocshmem_longlong_wait_until_any_vector(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__host__ void rocshmem_longlong_wait_until_all_vector(
|
||||
long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, long long val);
|
||||
__host__ size_t rocshmem_longlong_wait_until_some_vector(
|
||||
long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, long long val);
|
||||
|
||||
__device__ void rocshmem_uchar_wait_until(
|
||||
unsigned char *ivars, int cmp, unsigned char val);
|
||||
__device__ size_t rocshmem_uchar_wait_until_any(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__device__ void rocshmem_uchar_wait_until_all(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__device__ size_t rocshmem_uchar_wait_until_some(
|
||||
unsigned char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__device__ size_t rocshmem_uchar_wait_until_any_vector(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__device__ void rocshmem_uchar_wait_until_all_vector(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__device__ size_t rocshmem_uchar_wait_until_some_vector(
|
||||
unsigned char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__host__ void rocshmem_uchar_wait_until(
|
||||
unsigned char *ivars, int cmp, unsigned char val);
|
||||
__host__ size_t rocshmem_uchar_wait_until_any(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__host__ void rocshmem_uchar_wait_until_all(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__host__ size_t rocshmem_uchar_wait_until_some(
|
||||
unsigned char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__host__ size_t rocshmem_uchar_wait_until_any_vector(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__host__ void rocshmem_uchar_wait_until_all_vector(
|
||||
unsigned char *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
__host__ size_t rocshmem_uchar_wait_until_some_vector(
|
||||
unsigned char *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned char val);
|
||||
|
||||
__device__ void rocshmem_ushort_wait_until(
|
||||
unsigned short *ivars, int cmp, unsigned short val);
|
||||
__device__ size_t rocshmem_ushort_wait_until_any(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__device__ void rocshmem_ushort_wait_until_all(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__device__ size_t rocshmem_ushort_wait_until_some(
|
||||
unsigned short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__device__ size_t rocshmem_ushort_wait_until_any_vector(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__device__ void rocshmem_ushort_wait_until_all_vector(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__device__ size_t rocshmem_ushort_wait_until_some_vector(
|
||||
unsigned short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__host__ void rocshmem_ushort_wait_until(
|
||||
unsigned short *ivars, int cmp, unsigned short val);
|
||||
__host__ size_t rocshmem_ushort_wait_until_any(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__host__ void rocshmem_ushort_wait_until_all(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__host__ size_t rocshmem_ushort_wait_until_some(
|
||||
unsigned short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__host__ size_t rocshmem_ushort_wait_until_any_vector(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__host__ void rocshmem_ushort_wait_until_all_vector(
|
||||
unsigned short *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
__host__ size_t rocshmem_ushort_wait_until_some_vector(
|
||||
unsigned short *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned short val);
|
||||
|
||||
__device__ void rocshmem_uint_wait_until(
|
||||
unsigned int *ivars, int cmp, unsigned int val);
|
||||
__device__ size_t rocshmem_uint_wait_until_any(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__device__ void rocshmem_uint_wait_until_all(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__device__ size_t rocshmem_uint_wait_until_some(
|
||||
unsigned int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__device__ size_t rocshmem_uint_wait_until_any_vector(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__device__ void rocshmem_uint_wait_until_all_vector(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__device__ size_t rocshmem_uint_wait_until_some_vector(
|
||||
unsigned int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__host__ void rocshmem_uint_wait_until(
|
||||
unsigned int *ivars, int cmp, unsigned int val);
|
||||
__host__ size_t rocshmem_uint_wait_until_any(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__host__ void rocshmem_uint_wait_until_all(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__host__ size_t rocshmem_uint_wait_until_some(
|
||||
unsigned int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__host__ size_t rocshmem_uint_wait_until_any_vector(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__host__ void rocshmem_uint_wait_until_all_vector(
|
||||
unsigned int *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
__host__ size_t rocshmem_uint_wait_until_some_vector(
|
||||
unsigned int *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned int val);
|
||||
|
||||
__device__ void rocshmem_ulong_wait_until(
|
||||
unsigned long *ivars, int cmp, unsigned long val);
|
||||
__device__ size_t rocshmem_ulong_wait_until_any(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__device__ void rocshmem_ulong_wait_until_all(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__device__ size_t rocshmem_ulong_wait_until_some(
|
||||
unsigned long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__device__ size_t rocshmem_ulong_wait_until_any_vector(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__device__ void rocshmem_ulong_wait_until_all_vector(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__device__ size_t rocshmem_ulong_wait_until_some_vector(
|
||||
unsigned long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__host__ void rocshmem_ulong_wait_until(
|
||||
unsigned long *ivars, int cmp, unsigned long val);
|
||||
__host__ size_t rocshmem_ulong_wait_until_any(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__host__ void rocshmem_ulong_wait_until_all(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__host__ size_t rocshmem_ulong_wait_until_some(
|
||||
unsigned long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__host__ size_t rocshmem_ulong_wait_until_any_vector(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__host__ void rocshmem_ulong_wait_until_all_vector(
|
||||
unsigned long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
__host__ size_t rocshmem_ulong_wait_until_some_vector(
|
||||
unsigned long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long val);
|
||||
|
||||
__device__ void rocshmem_ulonglong_wait_until(
|
||||
unsigned long long *ivars, int cmp, unsigned long long val);
|
||||
__device__ size_t rocshmem_ulonglong_wait_until_any(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__device__ void rocshmem_ulonglong_wait_until_all(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__device__ size_t rocshmem_ulonglong_wait_until_some(
|
||||
unsigned long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__device__ size_t rocshmem_ulonglong_wait_until_any_vector(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__device__ void rocshmem_ulonglong_wait_until_all_vector(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__device__ size_t rocshmem_ulonglong_wait_until_some_vector(
|
||||
unsigned long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__host__ void rocshmem_ulonglong_wait_until(
|
||||
unsigned long long *ivars, int cmp, unsigned long long val);
|
||||
__host__ size_t rocshmem_ulonglong_wait_until_any(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__host__ void rocshmem_ulonglong_wait_until_all(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__host__ size_t rocshmem_ulonglong_wait_until_some(
|
||||
unsigned long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__host__ size_t rocshmem_ulonglong_wait_until_any_vector(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__host__ void rocshmem_ulonglong_wait_until_all_vector(
|
||||
unsigned long long *ivars, size_t nelems, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
__host__ size_t rocshmem_ulonglong_wait_until_some_vector(
|
||||
unsigned long long *ivars, size_t nelems, size_t* indices, const int* status,
|
||||
int cmp, unsigned long long val);
|
||||
|
||||
|
||||
/**
|
||||
* @name SHMEM_TEST
|
||||
* @brief test if the condition (* \p ptr \p cmps \p val) is
|
||||
* true.
|
||||
*
|
||||
* This function can be called from divergent control paths at per-thread
|
||||
* granularity. However, performance may be improved if the caller can
|
||||
* coalesce contiguous messages and elect a leader thread to call into the
|
||||
* ROCSHMEM function.
|
||||
*
|
||||
* @param[in] ivars Pointer to memory on the symmetric heap to wait for.
|
||||
* @param[in] cmp Operation for the comparison.
|
||||
* @param[in] val Value to compare the memory at \p ptr to.
|
||||
*
|
||||
* @return 1 if the evaluation is true else 0
|
||||
*/
|
||||
__device__ int rocshmem_float_test(
|
||||
float *ivars, int cmp, float val);
|
||||
__host__ int rocshmem_float_test(
|
||||
float *ivars, int cmp, float val);
|
||||
|
||||
__device__ int rocshmem_double_test(
|
||||
double *ivars, int cmp, double val);
|
||||
__host__ int rocshmem_double_test(
|
||||
double *ivars, int cmp, double val);
|
||||
|
||||
__device__ int rocshmem_char_test(
|
||||
char *ivars, int cmp, char val);
|
||||
__host__ int rocshmem_char_test(
|
||||
char *ivars, int cmp, char val);
|
||||
|
||||
__device__ int rocshmem_schar_test(
|
||||
signed char *ivars, int cmp, signed char val);
|
||||
__host__ int rocshmem_schar_test(
|
||||
signed char *ivars, int cmp, signed char val);
|
||||
|
||||
__device__ int rocshmem_short_test(
|
||||
short *ivars, int cmp, short val);
|
||||
__host__ int rocshmem_short_test(
|
||||
short *ivars, int cmp, short val);
|
||||
|
||||
__device__ int rocshmem_int_test(
|
||||
int *ivars, int cmp, int val);
|
||||
__host__ int rocshmem_int_test(
|
||||
int *ivars, int cmp, int val);
|
||||
|
||||
__device__ int rocshmem_long_test(
|
||||
long *ivars, int cmp, long val);
|
||||
__host__ int rocshmem_long_test(
|
||||
long *ivars, int cmp, long val);
|
||||
|
||||
__device__ int rocshmem_longlong_test(
|
||||
long long *ivars, int cmp, long long val);
|
||||
__host__ int rocshmem_longlong_test(
|
||||
long long *ivars, int cmp, long long val);
|
||||
|
||||
__device__ int rocshmem_uchar_test(
|
||||
unsigned char *ivars, int cmp, unsigned char val);
|
||||
__host__ int rocshmem_uchar_test(
|
||||
unsigned char *ivars, int cmp, unsigned char val);
|
||||
|
||||
__device__ int rocshmem_ushort_test(
|
||||
unsigned short *ivars, int cmp, unsigned short val);
|
||||
__host__ int rocshmem_ushort_test(
|
||||
unsigned short *ivars, int cmp, unsigned short val);
|
||||
|
||||
__device__ int rocshmem_uint_test(
|
||||
unsigned int *ivars, int cmp, unsigned int val);
|
||||
__host__ int rocshmem_uint_test(
|
||||
unsigned int *ivars, int cmp, unsigned int val);
|
||||
|
||||
__device__ int rocshmem_ulong_test(
|
||||
unsigned long *ivars, int cmp, unsigned long val);
|
||||
__host__ int rocshmem_ulong_test(
|
||||
unsigned long *ivars, int cmp, unsigned long val);
|
||||
|
||||
__device__ int rocshmem_ulonglong_test(
|
||||
unsigned long long *ivars, int cmp, unsigned long long val);
|
||||
__host__ int rocshmem_ulonglong_test(
|
||||
unsigned long long *ivars, int cmp, unsigned long long val);
|
||||
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_INCLUDE_ROCSHMEM_P2P_SYNC_HPP
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -0,0 +1,623 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef LIBRARY_INCLUDE_ROCSHMEM_SIG_OP_HPP
|
||||
#define LIBRARY_INCLUDE_ROCSHMEM_SIG_OP_HPP
|
||||
|
||||
namespace rocshmem {
|
||||
__device__ ATTR_NO_INLINE void rocshmem_putmem_signal(
|
||||
void *dest, const void *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_putmem_signal(
|
||||
rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_put_signal(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_float_put_signal(
|
||||
float *dest, const float *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_put_signal(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_double_put_signal(
|
||||
double *dest, const double *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_put_signal(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_char_put_signal(
|
||||
char *dest, const char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_put_signal(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_schar_put_signal(
|
||||
signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_put_signal(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_short_put_signal(
|
||||
short *dest, const short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_put_signal(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_int_put_signal(
|
||||
int *dest, const int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_put_signal(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_long_put_signal(
|
||||
long *dest, const long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_put_signal(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_longlong_put_signal(
|
||||
long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_put_signal(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uchar_put_signal(
|
||||
unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_put_signal(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ushort_put_signal(
|
||||
unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_put_signal(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uint_put_signal(
|
||||
unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_put_signal(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulong_put_signal(
|
||||
unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_put_signal(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulonglong_put_signal(
|
||||
unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_putmem_signal_wg(
|
||||
void *dest, const void *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_putmem_signal_wg(
|
||||
rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_float_put_signal_wg(
|
||||
float *dest, const float *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_double_put_signal_wg(
|
||||
double *dest, const double *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_char_put_signal_wg(
|
||||
char *dest, const char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_schar_put_signal_wg(
|
||||
signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_short_put_signal_wg(
|
||||
short *dest, const short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_int_put_signal_wg(
|
||||
int *dest, const int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_long_put_signal_wg(
|
||||
long *dest, const long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_longlong_put_signal_wg(
|
||||
long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uchar_put_signal_wg(
|
||||
unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ushort_put_signal_wg(
|
||||
unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uint_put_signal_wg(
|
||||
unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulong_put_signal_wg(
|
||||
unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_put_signal_wg(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulonglong_put_signal_wg(
|
||||
unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_putmem_signal_wave(
|
||||
void *dest, const void *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_putmem_signal_wave(
|
||||
rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_float_put_signal_wave(
|
||||
float *dest, const float *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_double_put_signal_wave(
|
||||
double *dest, const double *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_char_put_signal_wave(
|
||||
char *dest, const char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_schar_put_signal_wave(
|
||||
signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_short_put_signal_wave(
|
||||
short *dest, const short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_int_put_signal_wave(
|
||||
int *dest, const int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_long_put_signal_wave(
|
||||
long *dest, const long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_longlong_put_signal_wave(
|
||||
long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uchar_put_signal_wave(
|
||||
unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ushort_put_signal_wave(
|
||||
unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uint_put_signal_wave(
|
||||
unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulong_put_signal_wave(
|
||||
unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_put_signal_wave(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulonglong_put_signal_wave(
|
||||
unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_putmem_signal_nbi(
|
||||
void *dest, const void *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_putmem_signal_nbi(
|
||||
rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_float_put_signal_nbi(
|
||||
float *dest, const float *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_double_put_signal_nbi(
|
||||
double *dest, const double *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_char_put_signal_nbi(
|
||||
char *dest, const char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_schar_put_signal_nbi(
|
||||
signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_short_put_signal_nbi(
|
||||
short *dest, const short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_int_put_signal_nbi(
|
||||
int *dest, const int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_long_put_signal_nbi(
|
||||
long *dest, const long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_longlong_put_signal_nbi(
|
||||
long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uchar_put_signal_nbi(
|
||||
unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ushort_put_signal_nbi(
|
||||
unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uint_put_signal_nbi(
|
||||
unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulong_put_signal_nbi(
|
||||
unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_put_signal_nbi(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulonglong_put_signal_nbi(
|
||||
unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_putmem_signal_nbi_wg(
|
||||
void *dest, const void *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_putmem_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_float_put_signal_nbi_wg(
|
||||
float *dest, const float *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_double_put_signal_nbi_wg(
|
||||
double *dest, const double *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_char_put_signal_nbi_wg(
|
||||
char *dest, const char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_schar_put_signal_nbi_wg(
|
||||
signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_short_put_signal_nbi_wg(
|
||||
short *dest, const short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_int_put_signal_nbi_wg(
|
||||
int *dest, const int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_long_put_signal_nbi_wg(
|
||||
long *dest, const long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_longlong_put_signal_nbi_wg(
|
||||
long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uchar_put_signal_nbi_wg(
|
||||
unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ushort_put_signal_nbi_wg(
|
||||
unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uint_put_signal_nbi_wg(
|
||||
unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulong_put_signal_nbi_wg(
|
||||
unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_put_signal_nbi_wg(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulonglong_put_signal_nbi_wg(
|
||||
unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_putmem_signal_nbi_wave(
|
||||
void *dest, const void *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_putmem_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, void *dest, const void *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_float_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, float *dest, const float *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_float_put_signal_nbi_wave(
|
||||
float *dest, const float *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_double_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, double *dest, const double *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_double_put_signal_nbi_wave(
|
||||
double *dest, const double *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_char_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, char *dest, const char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_char_put_signal_nbi_wave(
|
||||
char *dest, const char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_schar_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, signed char *dest, const signed char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_schar_put_signal_nbi_wave(
|
||||
signed char *dest, const signed char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_short_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, short *dest, const short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_short_put_signal_nbi_wave(
|
||||
short *dest, const short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_int_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, int *dest, const int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_int_put_signal_nbi_wave(
|
||||
int *dest, const int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_long_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, long *dest, const long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_long_put_signal_nbi_wave(
|
||||
long *dest, const long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_longlong_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, long long *dest, const long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_longlong_put_signal_nbi_wave(
|
||||
long long *dest, const long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uchar_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, unsigned char *dest, const unsigned char *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uchar_put_signal_nbi_wave(
|
||||
unsigned char *dest, const unsigned char *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ushort_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, unsigned short *dest, const unsigned short *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ushort_put_signal_nbi_wave(
|
||||
unsigned short *dest, const unsigned short *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_uint_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, unsigned int *dest, const unsigned int *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_uint_put_signal_nbi_wave(
|
||||
unsigned int *dest, const unsigned int *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulong_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, unsigned long *dest, const unsigned long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulong_put_signal_nbi_wave(
|
||||
unsigned long *dest, const unsigned long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ctx_ulonglong_put_signal_nbi_wave(
|
||||
rocshmem_ctx_t ctx, unsigned long long *dest, const unsigned long long *source, size_t nelems,
|
||||
uint64_t *sig_addr, uint64_t signal, int sig_op, int pe);
|
||||
__device__ ATTR_NO_INLINE void rocshmem_ulonglong_put_signal_nbi_wave(
|
||||
unsigned long long *dest, const unsigned long long *source, size_t nelems, uint64_t *sig_addr,
|
||||
uint64_t signal, int sig_op, int pe);
|
||||
|
||||
|
||||
__device__ ATTR_NO_INLINE uint64_t rocshmem_signal_fetch(const uint64_t *sig_addr);
|
||||
__device__ ATTR_NO_INLINE uint64_t rocshmem_signal_fetch_wg(const uint64_t *sig_addr);
|
||||
__device__ ATTR_NO_INLINE uint64_t rocshmem_signal_fetch_wave(const uint64_t *sig_addr);
|
||||
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_INCLUDE_ROCSHMEM_SIG_OP_HPP
|
||||
@@ -0,0 +1,130 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef LIBRARY_INCLUDE_ROCSHMEM_COMMON_HPP
|
||||
#define LIBRARY_INCLUDE_ROCSHMEM_COMMON_HPP
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
#ifdef USE_FUNC_CALL
|
||||
#define ATTR_NO_INLINE __attribute__((noinline))
|
||||
#else
|
||||
#define ATTR_NO_INLINE
|
||||
#endif
|
||||
|
||||
|
||||
enum ROCSHMEM_STATUS {
|
||||
ROCSHMEM_SUCCESS = 0,
|
||||
ROCSHMEM_ERROR = 1,
|
||||
};
|
||||
|
||||
enum ROCSHMEM_OP {
|
||||
ROCSHMEM_SUM,
|
||||
ROCSHMEM_MAX,
|
||||
ROCSHMEM_MIN,
|
||||
ROCSHMEM_PROD,
|
||||
ROCSHMEM_AND,
|
||||
ROCSHMEM_OR,
|
||||
ROCSHMEM_XOR,
|
||||
ROCSHMEM_REPLACE
|
||||
};
|
||||
|
||||
enum ROCSHMEM_SIGNAL_OPS {
|
||||
ROCSHMEM_SIGNAL_SET,
|
||||
ROCSHMEM_SIGNAL_ADD,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Types defined for rocshmem_wait() operations.
|
||||
*/
|
||||
enum rocshmem_cmps {
|
||||
ROCSHMEM_CMP_EQ,
|
||||
ROCSHMEM_CMP_NE,
|
||||
ROCSHMEM_CMP_GT,
|
||||
ROCSHMEM_CMP_GE,
|
||||
ROCSHMEM_CMP_LT,
|
||||
ROCSHMEM_CMP_LE,
|
||||
};
|
||||
|
||||
enum rocshmem_thread_ops {
|
||||
ROCSHMEM_THREAD_SINGLE,
|
||||
ROCSHMEM_THREAD_FUNNELED,
|
||||
ROCSHMEM_THREAD_WG_FUNNELED,
|
||||
ROCSHMEM_THREAD_SERIALIZED,
|
||||
ROCSHMEM_THREAD_MULTIPLE
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Bitwise flags to mask configuration parameters.
|
||||
*/
|
||||
enum rocshmem_team_configs {
|
||||
ROCSHMEM_TEAM_DEFAULT_CONFIGS,
|
||||
ROCSHMEM_TEAM_NUM_CONTEXTS
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int num_contexts;
|
||||
} rocshmem_team_config_t;
|
||||
|
||||
constexpr size_t ROCSHMEM_REDUCE_MIN_WRKDATA_SIZE = 1024;
|
||||
constexpr size_t ROCSHMEM_ATA_MAX_WRKDATA_SIZE = (4 * 1024 * 1024);
|
||||
constexpr size_t ROCSHMEM_BARRIER_SYNC_SIZE = 256;
|
||||
constexpr size_t ROCSHMEM_REDUCE_SYNC_SIZE = 256;
|
||||
// Internally calls sync function, which matches barrier implementation
|
||||
constexpr size_t ROCSHMEM_BCAST_SYNC_SIZE = ROCSHMEM_BARRIER_SYNC_SIZE;
|
||||
constexpr size_t ROCSHMEM_ALLTOALL_SYNC_SIZE = ROCSHMEM_BARRIER_SYNC_SIZE + 1;
|
||||
constexpr size_t ROCSHMEM_FCOLLECT_SYNC_SIZE = ROCSHMEM_ALLTOALL_SYNC_SIZE;
|
||||
constexpr size_t ROCSHMEM_SYNC_VALUE = 0;
|
||||
|
||||
const int ROCSHMEM_CTX_ZERO = 0;
|
||||
const int ROCSHMEM_CTX_NOSTORE = 1;
|
||||
const int ROCSHMEM_CTX_SERIALIZED = 2;
|
||||
const int ROCSHMEM_CTX_WG_PRIVATE = 4;
|
||||
const int ROCSHMEM_CTX_SHARED = 8;
|
||||
|
||||
/**
|
||||
* @brief GPU side OpenSHMEM context created from each work-groups'
|
||||
* rocshmem_wg_handle_t
|
||||
*/
|
||||
typedef struct {
|
||||
void *ctx_opaque;
|
||||
void *team_opaque;
|
||||
} rocshmem_ctx_t;
|
||||
|
||||
/**
|
||||
* Shmem default context.
|
||||
*/
|
||||
extern __constant__ rocshmem_ctx_t ROCSHMEM_CTX_DEFAULT;
|
||||
|
||||
/**
|
||||
* Used internally to set default context.
|
||||
*/
|
||||
void set_internal_ctx(rocshmem_ctx_t *ctx);
|
||||
|
||||
typedef uint64_t *rocshmem_team_t;
|
||||
extern rocshmem_team_t ROCSHMEM_TEAM_WORLD;
|
||||
|
||||
const rocshmem_team_t ROCSHMEM_TEAM_INVALID = nullptr;
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_INCLUDE_ROCSHMEM_COMMON_HPP
|
||||
Ссылка в новой задаче
Block a user