diff --git a/projects/rocshmem/CHANGELOG.md b/projects/rocshmem/CHANGELOG.md index 6cfc340b5a..2acc306227 100644 --- a/projects/rocshmem/CHANGELOG.md +++ b/projects/rocshmem/CHANGELOG.md @@ -6,18 +6,18 @@ * Added the Reverse Offload conduit * Added new APIs: - * `rocshmem_barrier` - * `rocshmem_barrier_wave` - * `rocshmem_barrier_wg` - * `rocshmem_barrier_all` - * `rocshmem_barrier_all_wave` - * `rocshmem_barrier_all_wg` - * `rocshmem_sync` - * `rocshmem_sync_wave` - * `rocshmem_sync_wg` - * `rocshmem_sync_all` - * `rocshmem_sync_all_wave` - * `rocshmem_sync_all_wg` + * `rocshmem_ctx_barrier` + * `rocshmem_ctx_barrier_wave` + * `rocshmem_ctx_barrier_wg` + * `rocshmem_ctx_barrier_all` + * `rocshmem_ctx_barrier_all_wave` + * `rocshmem_ctx_barrier_all_wg` + * `rocshmem_ctx_sync` + * `rocshmem_ctx_sync_wave` + * `rocshmem_ctx_sync_wg` + * `rocshmem_ctx_sync_all` + * `rocshmem_ctx_sync_all_wave` + * `rocshmem_ctx_sync_all_wg` * `rocshmem_init_attr` * `rocshmem_get_uniqueid` * `rocshmem_set_attr_uniqueid_args` diff --git a/projects/rocshmem/docs/api/coll.rst b/projects/rocshmem/docs/api/coll.rst index bc6bfa28ed..1dd137f641 100644 --- a/projects/rocshmem/docs/api/coll.rst +++ b/projects/rocshmem/docs/api/coll.rst @@ -11,8 +11,23 @@ Collective routines ROCSHMEM_BARRIER_ALL -------------------- -.. cpp:function:: __device__ void rocshmem_ctx_wg_barrier_all(rocshmem_ctx_t ctx) -.. cpp:function:: __device__ void rocshmem_wg_barrier_all() +.. cpp:function:: __device__ void rocshmem_ctx_barrier_all(rocshmem_ctx_t ctx) +.. cpp:function:: __device__ void rocshmem_ctx_barrier_all_wave(rocshmem_ctx_t ctx) +.. cpp:function:: __device__ void rocshmem_ctx_barrier_all_wg(rocshmem_ctx_t ctx) + + :param ctx: Context with which to perform this operation. + :returns: None. + +**Description:** +This routine performs a collective barrier across all PEs in the system. +The caller is blocked until the barrier is resolved and all updates local and remote are completed. + +ROCSHMEM_BARRIER +---------------- + +.. cpp:function:: __device__ void rocshmem_ctx_barrier(rocshmem_ctx_t ctx, rocshmem_team_t team) +.. cpp:function:: __device__ void rocshmem_ctx_barrier_wave(rocshmem_ctx_t ctx, rocshmem_team_t team) +.. cpp:function:: __device__ void rocshmem_ctx_barrier_wg(rocshmem_ctx_t ctx, rocshmem_team_t team) :param ctx: Context with which to perform this operation. :returns: None. @@ -24,8 +39,9 @@ The caller is blocked until the barrier is resolved. ROCSHMEM_TEAM_SYNC ------------------ -.. cpp:function:: __device__ void rocshmem_ctx_wg_team_sync(rocshmem_ctx_t ctx, rocshmem_team_t team) -.. cpp:function:: __device__ void rocshmem_wg_team_sync(rocshmem_team_t team) +.. cpp:function:: __device__ void rocshmem_ctx_sync(rocshmem_ctx_t ctx, rocshmem_team_t team) +.. cpp:function:: __device__ void rocshmem_ctx_sync_wave(rocshmem_ctx_t ctx, rocshmem_team_t team) +.. cpp:function:: __device__ void rocshmem_ctx_sync_wg(rocshmem_ctx_t ctx, rocshmem_team_t team) :param ctx: Context with which to perform this operation. :param team: Team with which to perform this operation. @@ -42,20 +58,20 @@ ensure the completion of remote memory updates issued via OpenSHMEM routines. ROCSHMEM_SYNC_ALL ----------------- -.. cpp:function:: __device__ void rocshmem_ctx_wg_sync_all(rocshmem_ctx_t ctx) -.. cpp:function:: __device__ void rocshmem_wg_sync_all() +.. cpp:function:: __device__ void rocshmem_ctx_sync_all(rocshmem_ctx_t ctx) +.. cpp:function:: __device__ void rocshmem_ctx_sync_all_wave(rocshmem_ctx_t ctx) +.. cpp:function:: __device__ void rocshmem_ctx_sync_all_wg(rocshmem_ctx_t ctx) :param ctx: Context with which to perform this operation. :returns: None. **Description:** -This routine behaves the same as ``rocshmem_wg_team_sync`` when called on the world team. - +This routine behaves the same as ``rocshmem_team_sync_wg`` when called on the world team. ROSHMEM_ALLTOALL ---------------- -.. cpp:function:: __device__ void rocshmem_ctx_TYPENAME_wg_alltoall(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems) +.. cpp:function:: __device__ void rocshmem_ctx_TYPENAME_alltoall_wg(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems) :param team: The team participating in the collective. :param dest: Destination address. Must be an address on the @@ -75,7 +91,7 @@ Valid ``TYPENAME`` and ``TYPE`` values are listed in :ref:`RMA_TYPES`. ROCSHMEM_BROADCAST ------------------ -.. cpp:function:: __device__ void rocshmem_ctx_TYPENAME_wg_broadcast(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems, int pe_root) +.. cpp:function:: __device__ void rocshmem_ctx_TYPENAME_broadcast_wg(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems, int pe_root) :param ctx: Context with which to perform this collective. :param team: The team participating in the collective. @@ -95,7 +111,7 @@ Valid ``TYPENAME`` and ``TYPE`` values are listed in :ref:`RMA_TYPES`. ROCSHMEM_FCOLLECT ----------------- -.. cpp:function:: __device__ void rocshmem_ctx_TYPENAME_wg_fcollect(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems) +.. cpp:function:: __device__ void rocshmem_ctx_TYPENAME_fcollect_wg(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nelems) :param ctx: Context with which to perform this collective. :param team: The team participating in the collective. @@ -112,7 +128,7 @@ PE participating in the collective routine. ROCSHMEM_REDUCTION ------------------ -.. cpp:function:: __device__ int rocshmem_ctx_TYPENAME_OPNAME_wg_reduce(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nreduce) +.. cpp:function:: __device__ int rocshmem_ctx_TYPENAME_OPNAME_reduce_wg(rocshmem_ctx_t ctx, rocshmem_team_t team, TYPE *dest, const TYPE *source, int nreduce) :param ctx: Context with which to perform this collective. :param team: The team participating in the collective. diff --git a/projects/rocshmem/docs/api/init.rst b/projects/rocshmem/docs/api/init.rst index fb89eb9f3e..51f57e0920 100644 --- a/projects/rocshmem/docs/api/init.rst +++ b/projects/rocshmem/docs/api/init.rst @@ -53,6 +53,45 @@ This routine finalizes device-side rocSHMEM resources. It must be called before work-group completion if the work-group also called ``rocshmem_wg_init``. It must be called collectively by all threads in the work-group. +ROCSHMEM_INIT_ATTR +------------------ +.. cpp:function:: __host__ int rocshmem_init_attr(unsigned int flags, rocshmem_init_attr_t *attr) + + :param flags: The initialization method to be used. + :param attr: Attribute structure specifying input characteristics. + + :returns int: Returns ``0`` on success; otherwise, returns a nonzero value. + +**Description:** +This routine initializes the rocSHMEM runtime and underlying transport layer using +the provided mode and attributes. +The parameter ``flags`` can be either +``ROCSHMEM_INIT_WITH_UNIQUEID`` or ``ROCSHMEM_INIT_WITH_MPI_COMM``. + +ROCSHMEM_GET_UNIQUEID +--------------------- +.. cpp:function:: __host__ int rocshmem_get_uniqueid(rocshmem_uniqueid_t *uid) + + :param uid: Pointer to a unique ID handle. + :returns: Returns ``0`` on success; otherwise, returns a nonzero value. + +**Description:** +This routine returns a unique ID. + +ROCSHMEM_SET_ATTR_UNIQUEID_ARGS +------------------------------- +.. cpp:function:: __host__ int rocshmem_set_attr_uniqueid_args(int rank, int nranks, rocshmem_uniqueid_t *uid, rocshmem_init_attr_t *attr) + + :param rank: Rank of the calling process. + :param nranks: Number of PEs. + :param uid: Unique ID used to identify the group processes. + :param attr: Attribute structure to be passed to ``rocshmem_init_attr_t``. + + :returns: Returns ``0`` on success; otherwise, returns a nonzero value. + +**Description:** +This routine initializes the ``rocshmem_init_attr_t`` struct. + ROCSHMEM_N_PES -------------- diff --git a/projects/rocshmem/include/rocshmem/rocshmem.hpp b/projects/rocshmem/include/rocshmem/rocshmem.hpp index d3796ab7ab..ad150e0b53 100644 --- a/projects/rocshmem/include/rocshmem/rocshmem.hpp +++ b/projects/rocshmem/include/rocshmem/rocshmem.hpp @@ -105,7 +105,7 @@ __host__ int rocshmem_init_attr(unsigned int flags, rocshmem_init_attr_t *attr); __host__ int rocshmem_get_uniqueid(rocshmem_uniqueid_t *uid); /** - * @brief Query the thread mode used by the runtime. + * @brief Initalizes the rocshmem_init_attr_t struct * * @param[in] rank rank of the calling process * @param[in] nranks number of pes