Readthedocs documentation support (#71)
This commit is contained in:
+2456
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
دودویی (BIN)
فایل باینری نشان داده نشده است.
|
پس از عرض: | ارتفاع: | اندازه: 6.6 KiB |
Executable
+16
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -d docBin ]; then
|
||||
rm -rf docBin
|
||||
fi
|
||||
|
||||
sed -e 's/ROCFFT_EXPORT //g' ../library/include/rocfft.h > rocfft.h
|
||||
doxygen Doxyfile
|
||||
|
||||
cd source
|
||||
make clean
|
||||
make html
|
||||
cd ..
|
||||
|
||||
rm rocfft.h
|
||||
|
||||
Executable
+12
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -d docBin ]; then
|
||||
rm -rf docBin
|
||||
fi
|
||||
|
||||
rm nccl.h
|
||||
|
||||
sed -e 's/ROCFFT_EXPORT //g' ../src/nccl.h.in > nccl.h
|
||||
doxygen Doxyfile
|
||||
#rm nccl.h
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
SPHINXPROJ = RCCL
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
@@ -0,0 +1,11 @@
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
:caption: Contents:
|
||||
|
||||
=======
|
||||
All API
|
||||
=======
|
||||
|
||||
.. doxygenindex::
|
||||
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
:caption: Contents:
|
||||
|
||||
===
|
||||
API
|
||||
===
|
||||
|
||||
This section provides details of the library API
|
||||
|
||||
Communicator Functions
|
||||
----------------------
|
||||
|
||||
.. doxygenfunction:: ncclGetUniqueId
|
||||
|
||||
.. doxygenfunction:: ncclCommInitRank
|
||||
|
||||
.. doxygenfunction:: ncclCommInitAll
|
||||
|
||||
.. doxygenfunction:: ncclCommDestroy
|
||||
|
||||
.. doxygenfunction:: ncclCommCount
|
||||
|
||||
.. doxygenfunction:: ncclCommCuDevice
|
||||
|
||||
.. doxygenfunction:: ncclCommUserRank
|
||||
|
||||
Collection Communication Operations
|
||||
-----------------------------------
|
||||
|
||||
Collective communication operations must be called separately for each communicator in a communicator clique.
|
||||
|
||||
They return when operations have been enqueued on the hipstream.
|
||||
|
||||
Since they may perform inter-CPU synchronization, each call has to be done from a different thread or process, or need to use Group Semantics (see below).
|
||||
|
||||
.. doxygenfunction:: ncclReduce
|
||||
|
||||
.. doxygenfunction:: ncclBcast
|
||||
|
||||
.. doxygenfunction:: ncclBroadcast
|
||||
|
||||
.. doxygenfunction:: ncclAllReduce
|
||||
|
||||
.. doxygenfunction:: ncclReduceScatter
|
||||
|
||||
.. doxygenfunction:: ncclAllGather
|
||||
|
||||
|
||||
Group Semantics
|
||||
---------------
|
||||
When managing multiple GPUs from a single thread, and since NCCL collective
|
||||
calls may perform inter-CPU synchronization, we need to "group" calls for
|
||||
different ranks/devices into a single call.
|
||||
|
||||
Grouping NCCL calls as being part of the same collective operation is done
|
||||
using ncclGroupStart and ncclGroupEnd. ncclGroupStart will enqueue all
|
||||
collective calls until the ncclGroupEnd call, which will wait for all calls
|
||||
to be complete. Note that for collective communication, ncclGroupEnd only
|
||||
guarantees that the operations are enqueued on the streams, not that
|
||||
the operation is effectively done.
|
||||
|
||||
Both collective communication and ncclCommInitRank can be used in conjunction
|
||||
of ncclGroupStart/ncclGroupEnd.
|
||||
|
||||
.. doxygenfunction:: ncclGroupStart
|
||||
|
||||
.. doxygenfunction:: ncclGroupEnd
|
||||
|
||||
Library Functions
|
||||
-----------------
|
||||
|
||||
.. doxygenfunction:: ncclGetVersion
|
||||
|
||||
.. doxygenfunction:: ncclGetErrorString
|
||||
|
||||
Types
|
||||
-----
|
||||
|
||||
There are few data structures that are internal to the library. The pointer types to these
|
||||
structures are given below. The user would need to use these types to create handles and pass them
|
||||
between different library functions.
|
||||
|
||||
.. doxygentypedef:: ncclComm_t
|
||||
|
||||
.. doxygenstruct:: ncclUniqueId
|
||||
|
||||
|
||||
|
||||
Enumerations
|
||||
------------
|
||||
|
||||
This section provides all the enumerations used.
|
||||
|
||||
.. doxygenenum:: ncclResult_t
|
||||
|
||||
.. doxygenenum:: ncclRedOp_t
|
||||
|
||||
.. doxygenenum:: ncclDataType_t
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# RCCL documentation build configuration file, created by
|
||||
# sphinx-quickstart on Mon Jan 8 16:34:42 2018.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its
|
||||
# containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
# import os
|
||||
# import sys
|
||||
# sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True'
|
||||
|
||||
if read_the_docs_build:
|
||||
subprocess.call('cd ..; ./run_doxygen.sh; cd source', shell=True)
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#
|
||||
# needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ['sphinx.ext.mathjax', 'breathe']
|
||||
breathe_projects = { "RCCL": "../docBin/xml" }
|
||||
breathe_default_project = "RCCL"
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
# You can specify multiple suffix as a list of string:
|
||||
#
|
||||
# source_suffix = ['.rst', '.md']
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'RCCL'
|
||||
copyright = u'2019, Advanced Mirco Devices'
|
||||
author = u'Advanced Mirco Devices'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = u'0.8'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = u'0.8'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#
|
||||
# This is also used if you do content translation via gettext catalogs.
|
||||
# Usually you set "language" from the command line for these cases.
|
||||
language = None
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This patterns also effect to html_static_path and html_extra_path
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||
todo_include_todos = False
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
# html_theme = 'alabaster'
|
||||
|
||||
if read_the_docs_build:
|
||||
html_theme = 'default'
|
||||
else:
|
||||
import sphinx_rtd_theme
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#
|
||||
# html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
# html_static_path = ['_static']
|
||||
|
||||
# Custom sidebar templates, must be a dictionary that maps document names
|
||||
# to template names.
|
||||
#
|
||||
# This is required for the alabaster theme
|
||||
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
|
||||
# html_sidebars = {
|
||||
# '**': [
|
||||
# 'relations.html', # needs 'show_related': True theme option to display
|
||||
# 'searchbox.html',
|
||||
# ]
|
||||
# }
|
||||
|
||||
|
||||
# -- Options for HTMLHelp output ------------------------------------------
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'RCCLdoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#
|
||||
# 'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#
|
||||
# 'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#
|
||||
# 'preamble': '',
|
||||
|
||||
# Latex figure (float) alignment
|
||||
#
|
||||
# 'figure_align': 'htbp',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
(master_doc, 'RCCL.tex', u'RCCL Documentation',
|
||||
u'Advanced Mirco Devices', 'manual'),
|
||||
]
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
(master_doc, 'RCCL', u'RCCL Documentation',
|
||||
[author], 1)
|
||||
]
|
||||
|
||||
|
||||
# -- Options for Texinfo output -------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
(master_doc, 'RCCL', u'RCCL Documentation',
|
||||
author, 'RCCL', 'One line description of project.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
.. rocFFT documentation master file, created by
|
||||
sphinx-quickstart on Mon Jan 8 09:51:41 2018.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to RCCL's documentation!
|
||||
==================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
:caption: Contents:
|
||||
|
||||
library
|
||||
api
|
||||
allapi
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`search`
|
||||
@@ -0,0 +1,13 @@
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
:caption: Contents:
|
||||
|
||||
======
|
||||
RCCL
|
||||
======
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The RCCL is an AMD port of NCCL.
|
||||
@@ -0,0 +1,3 @@
|
||||
|
||||
breathe
|
||||
|
||||
+68
-51
@@ -23,14 +23,16 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Opaque handle to communicator */
|
||||
/*! @brief Opaque handle to communicator
|
||||
*/
|
||||
typedef struct ncclComm* ncclComm_t;
|
||||
|
||||
#define NCCL_UNIQUE_ID_BYTES 128
|
||||
/*! @brief struct to store ncclUniqueId */
|
||||
typedef struct { char internal[NCCL_UNIQUE_ID_BYTES]; } ncclUniqueId;
|
||||
|
||||
/* Error type */
|
||||
typedef enum { ncclSuccess = 0,
|
||||
/*! @brief Error type */
|
||||
typedef enum { ncclSuccess = 0, /**< Successfuly ran */
|
||||
ncclUnhandledCudaError = 1,
|
||||
ncclSystemError = 2,
|
||||
ncclInternalError = 3,
|
||||
@@ -38,65 +40,86 @@ typedef enum { ncclSuccess = 0,
|
||||
ncclInvalidUsage = 5,
|
||||
ncclNumResults = 6 } ncclResult_t;
|
||||
|
||||
/* Return the NCCL_VERSION_CODE of the NCCL library in the supplied integer.
|
||||
* This integer is coded with the MAJOR, MINOR and PATCH level of the
|
||||
|
||||
|
||||
/*! @brief Return the NCCL_VERSION_CODE of the NCCL library in the supplied integer.
|
||||
*
|
||||
* @details This integer is coded with the MAJOR, MINOR and PATCH level of the
|
||||
* NCCL library
|
||||
*/
|
||||
ncclResult_t ncclGetVersion(int *version);
|
||||
ncclResult_t pncclGetVersion(int *version);
|
||||
|
||||
/* Generates an Id to be used in ncclCommInitRank. ncclGetUniqueId should be
|
||||
* called once and the Id should be distributed to all ranks in the
|
||||
* communicator before calling ncclCommInitRank. */
|
||||
/*! @brief Generates an ID for ncclCommInitRank
|
||||
|
||||
@details
|
||||
Generates an ID to be used in ncclCommInitRank. ncclGetUniqueId should be
|
||||
called once and the Id should be distributed to all ranks in the
|
||||
communicator before calling ncclCommInitRank.
|
||||
|
||||
@param[in]
|
||||
uniqueId ncclUniqueId*
|
||||
pointer to uniqueId
|
||||
|
||||
*/
|
||||
ncclResult_t ncclGetUniqueId(ncclUniqueId* uniqueId);
|
||||
ncclResult_t pncclGetUniqueId(ncclUniqueId* uniqueId);
|
||||
|
||||
/* Creates a new communicator (multi thread/process version).
|
||||
* rank must be between 0 and nranks-1 and unique within a communicator clique.
|
||||
* Each rank is associated to a CUDA device, which has to be set before calling
|
||||
* ncclCommInitRank.
|
||||
* ncclCommInitRank implicitly syncronizes with other ranks, so it must be
|
||||
* called by different threads/processes or use ncclGroupStart/ncclGroupEnd. */
|
||||
/*! @brief Creates a new communicator (multi thread/process version).
|
||||
|
||||
@details
|
||||
rank must be between 0 and nranks-1 and unique within a communicator clique.
|
||||
Each rank is associated to a CUDA device, which has to be set before calling
|
||||
ncclCommInitRank.
|
||||
ncclCommInitRank implicitly syncronizes with other ranks, so it must be
|
||||
called by different threads/processes or use ncclGroupStart/ncclGroupEnd.
|
||||
|
||||
@param[in]
|
||||
comm ncclComm_t*
|
||||
communicator struct pointer
|
||||
*/
|
||||
ncclResult_t ncclCommInitRank(ncclComm_t* comm, int nranks, ncclUniqueId commId, int rank);
|
||||
ncclResult_t pncclCommInitRank(ncclComm_t* comm, int nranks, ncclUniqueId commId, int rank);
|
||||
|
||||
/* Creates a clique of communicators (single process version).
|
||||
* This is a convenience function to create a single-process communicator clique.
|
||||
/*! @brief Creates a clique of communicators (single process version).
|
||||
*
|
||||
* @details This is a convenience function to create a single-process communicator clique.
|
||||
* Returns an array of ndev newly initialized communicators in comm.
|
||||
* comm should be pre-allocated with size at least ndev*sizeof(ncclComm_t).
|
||||
* If devlist is NULL, the first ndev CUDA devices are used.
|
||||
* Order of devlist defines user-order of processors within the communicator. */
|
||||
* If devlist is NULL, the first ndev HIP devices are used.
|
||||
* Order of devlist defines user-order of processors within the communicator.
|
||||
* */
|
||||
ncclResult_t ncclCommInitAll(ncclComm_t* comm, int ndev, const int* devlist);
|
||||
ncclResult_t pncclCommInitAll(ncclComm_t* comm, int ndev, const int* devlist);
|
||||
|
||||
/* Frees resources associated with communicator object. */
|
||||
/*! @brief Frees resources associated with communicator object. */
|
||||
ncclResult_t ncclCommDestroy(ncclComm_t comm);
|
||||
ncclResult_t pncclCommDestroy(ncclComm_t comm);
|
||||
|
||||
/* Returns a human-readable error message. */
|
||||
/*! @brief Returns a human-readable error message. */
|
||||
const char* ncclGetErrorString(ncclResult_t result);
|
||||
const char* pncclGetErrorString(ncclResult_t result);
|
||||
|
||||
/* Gets the number of ranks in the communicator clique. */
|
||||
/*! @brief Gets the number of ranks in the communicator clique. */
|
||||
ncclResult_t ncclCommCount(const ncclComm_t comm, int* count);
|
||||
ncclResult_t pncclCommCount(const ncclComm_t comm, int* count);
|
||||
|
||||
/* Returns the cuda device number associated with the communicator. */
|
||||
/*! @brief Returns the rocm device number associated with the communicator. */
|
||||
ncclResult_t ncclCommCuDevice(const ncclComm_t comm, int* device);
|
||||
ncclResult_t pncclCommCuDevice(const ncclComm_t comm, int* device);
|
||||
|
||||
/* Returns the user-ordered "rank" associated with the communicator. */
|
||||
/*! @brief Returns the user-ordered "rank" associated with the communicator. */
|
||||
ncclResult_t ncclCommUserRank(const ncclComm_t comm, int* rank);
|
||||
ncclResult_t pncclCommUserRank(const ncclComm_t comm, int* rank);
|
||||
|
||||
/* Reduction operation selector */
|
||||
/*! @brief Reduction operation selector */
|
||||
typedef enum { ncclSum = 0,
|
||||
ncclProd = 1,
|
||||
ncclMax = 2,
|
||||
ncclMin = 3,
|
||||
ncclNumOps = 4 } ncclRedOp_t;
|
||||
|
||||
/* Data types */
|
||||
/*! @brief Data types */
|
||||
typedef enum { ncclInt8 = 0, ncclChar = 0,
|
||||
ncclUint8 = 1,
|
||||
ncclInt32 = 2, ncclInt = 2,
|
||||
@@ -114,17 +137,17 @@ typedef enum { ncclInt8 = 0, ncclChar = 0,
|
||||
* Collective communication operations must be called separately for each
|
||||
* communicator in a communicator clique.
|
||||
*
|
||||
* They return when operations have been enqueued on the CUDA stream.
|
||||
* They return when operations have been enqueued on the hipstream.
|
||||
*
|
||||
* Since they may perform inter-CPU synchronization, each call has to be done
|
||||
* from a different thread or process, or need to use Group Semantics (see
|
||||
* below).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reduce
|
||||
/*!
|
||||
* @brief Reduce collective communication
|
||||
*
|
||||
* Reduces data arrays of length count in sendbuff into recvbuff using op
|
||||
* @details Reduces data arrays of length count in sendbuff into recvbuff using op
|
||||
* operation.
|
||||
* recvbuff may be NULL on all calls except for root device.
|
||||
* root is the rank (not the CUDA device) where data will reside after the
|
||||
@@ -137,10 +160,9 @@ ncclResult_t ncclReduce(const void* sendbuff, void* recvbuff, size_t count, ncc
|
||||
ncclResult_t pncclReduce(const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype,
|
||||
ncclRedOp_t op, int root, ncclComm_t comm, hipStream_t stream);
|
||||
|
||||
/*
|
||||
* (deprecated) Broadcast (in-place)
|
||||
/*! @brief (deprecated) Broadcast (in-place)
|
||||
*
|
||||
* Copies count values from root to all other devices.
|
||||
* @details Copies count values from root to all other devices.
|
||||
* root is the rank (not the CUDA device) where data resides before the
|
||||
* operation is started.
|
||||
*
|
||||
@@ -151,11 +173,10 @@ ncclResult_t ncclBcast(void* buff, size_t count, ncclDataType_t datatype, int r
|
||||
ncclResult_t pncclBcast(void* buff, size_t count, ncclDataType_t datatype, int root,
|
||||
ncclComm_t comm, hipStream_t stream);
|
||||
|
||||
/*
|
||||
* Broadcast
|
||||
/*! @brief Broadcast
|
||||
*
|
||||
* Copies count values from root to all other devices.
|
||||
* root is the rank (not the CUDA device) where data resides before the
|
||||
* @details Copies count values from root to all other devices.
|
||||
* root is the rank (not the HIP device) where data resides before the
|
||||
* operation is started.
|
||||
*
|
||||
* In-place operation will happen if sendbuff == recvbuff.
|
||||
@@ -165,10 +186,9 @@ ncclResult_t ncclBroadcast(const void* sendbuff, void* recvbuff, size_t count,
|
||||
ncclResult_t pncclBroadcast(const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, int root,
|
||||
ncclComm_t comm, hipStream_t stream);
|
||||
|
||||
/*
|
||||
* All-Reduce
|
||||
/*! @brief All-Reduce
|
||||
*
|
||||
* Reduces data arrays of length count in sendbuff using op operation, and
|
||||
* @details Reduces data arrays of length count in sendbuff using op operation, and
|
||||
* leaves identical copies of result on each recvbuff.
|
||||
*
|
||||
* In-place operation will happen if sendbuff == recvbuff.
|
||||
@@ -178,10 +198,10 @@ ncclResult_t ncclAllReduce(const void* sendbuff, void* recvbuff, size_t count,
|
||||
ncclResult_t pncclAllReduce(const void* sendbuff, void* recvbuff, size_t count,
|
||||
ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm, hipStream_t stream);
|
||||
|
||||
/*
|
||||
* Reduce-Scatter
|
||||
/*!
|
||||
* @brief Reduce-Scatter
|
||||
*
|
||||
* Reduces data in sendbuff using op operation and leaves reduced result
|
||||
* @details Reduces data in sendbuff using op operation and leaves reduced result
|
||||
* scattered over the devices so that recvbuff on rank i will contain the i-th
|
||||
* block of the result.
|
||||
* Assumes sendcount is equal to nranks*recvcount, which means that sendbuff
|
||||
@@ -196,10 +216,9 @@ ncclResult_t pncclReduceScatter(const void* sendbuff, void* recvbuff,
|
||||
size_t recvcount, ncclDataType_t datatype, ncclRedOp_t op, ncclComm_t comm,
|
||||
hipStream_t stream);
|
||||
|
||||
/*
|
||||
* All-Gather
|
||||
/*! @brief All-Gather
|
||||
*
|
||||
* Each device gathers sendcount values from other GPUs into recvbuff,
|
||||
* @details Each device gathers sendcount values from other GPUs into recvbuff,
|
||||
* receiving data from rank i at offset i*sendcount.
|
||||
* Assumes recvcount is equal to nranks*sendcount, which means that recvbuff
|
||||
* should have a size of at least nranks*sendcount elements.
|
||||
@@ -229,18 +248,16 @@ ncclResult_t pncclAllGather(const void* sendbuff, void* recvbuff, size_t sendcou
|
||||
* of ncclGroupStart/ncclGroupEnd.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Group Start
|
||||
/*! @brief Group Start
|
||||
*
|
||||
* Start a group call. All subsequent calls to NCCL may not block due to
|
||||
* @details Start a group call. All subsequent calls to NCCL may not block due to
|
||||
* inter-CPU synchronization.
|
||||
*/
|
||||
ncclResult_t ncclGroupStart();
|
||||
|
||||
/*
|
||||
* Group End
|
||||
/*! @brief Group End
|
||||
*
|
||||
* End a group call. Wait for all calls since ncclGroupStart to complete
|
||||
* @details End a group call. Wait for all calls since ncclGroupStart to complete
|
||||
* before returning.
|
||||
*/
|
||||
ncclResult_t ncclGroupEnd();
|
||||
|
||||
مرجع در شماره جدید
Block a user