diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9cdf2d670c..1c3fa17b15 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -6,7 +6,12 @@ version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values - directory: "/docs/.sphinx" # Location of package manifests + directory: "/docs/sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" + labels: + - "dependencies" + - "noCI" + reviewers: + - "samjwu" diff --git a/.gitignore b/.gitignore index 402fdcc8e9..919c9dffb8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ _static/ _templates/ _toc.yml docBin/ +_doxygen/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 43a0890c96..5f50df2525 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,12 +3,16 @@ version: 2 +build: + os: ubuntu-22.04 + tools: + python: "3.8" + sphinx: configuration: docs/conf.py -formats: [htmlzip] +formats: [htmlzip, pdf, epub] python: - version: "3.8" install: - - requirements: docs/.sphinx/requirements.txt + - requirements: docs/sphinx/requirements.txt diff --git a/README.md b/README.md index 0f0f48db64..ce9dc6d4bc 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ The root of this repository has a helper script 'install.sh' to build and instal * `./install.sh --npkit-enable` -- enable compilation of npkit profiler framework with all options ## Manual build + ### To build the library : ```shell @@ -60,18 +61,8 @@ $ sudo dpkg -i *.deb RCCL package install requires sudo/root access because it creates a directory called "rccl" under /opt/rocm/. This is an optional step and RCCL can be used directly by including the path containing librccl.so. -### How to build documentation -Please follow the instructions below to build the documentation. -```bash -cd docs - -pip3 install -r .sphinx/requirements.txt - -python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html -``` - - ## Enabling peer-to-peer transport + In order to enable peer-to-peer access on machines with PCIe-connected GPUs, the HSA environment variable HSA_FORCE_FINE_GRAIN_PCIE=1 is required to be set, on top of requiring GPUs that support peer-to-peer access and proper large BAR addressing support. ## Tests @@ -108,7 +99,7 @@ To manually analyze NPKit dump results, please leverage [npkit_trace_generator.p ## Library and API Documentation -Please refer to the [Library documentation](https://rccl.readthedocs.io/) for current documentation. +Please refer to the [RCCL Documentation Site](https://rocm.docs.amd.com/projects/rccl/en/latest/) for current documentation. ### How to build documentation @@ -117,7 +108,7 @@ Run the steps below to build documentation locally. ``` cd docs -pip3 install -r .sphinx/requirements.txt +pip3 install -r sphinx/requirements.txt python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html ``` diff --git a/docs/.sphinx/requirements.in b/docs/.sphinx/requirements.in deleted file mode 100644 index 0be4351f8d..0000000000 --- a/docs/.sphinx/requirements.in +++ /dev/null @@ -1 +0,0 @@ -rocm-docs-core==0.13.3 diff --git a/docs/attributions.rst b/docs/attributions.rst index 9eb3e41208..36beccd6cf 100644 --- a/docs/attributions.rst +++ b/docs/attributions.rst @@ -40,8 +40,4 @@ under subcontract 7078610 with Lawrence Berkeley National Laboratory. This code also includes files from the NVIDIA Tools Extension SDK project. -For more information and license details, see - - https://github.com/NVIDIA/NVTX - - +For more information and license details, see [https://github.com/NVIDIA/NVTX](https://github.com/NVIDIA/NVTX) diff --git a/docs/conf.py b/docs/conf.py index bb4550a2c8..74773b4785 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,10 +4,23 @@ # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html +import subprocess + from rocm_docs import ROCmDocs -docs_core = ROCmDocs("RCCL Documentation") -docs_core.run_doxygen() + +name = "RCCL" +get_major = r'sed -n -e "s/^NCCL_MAJOR.*\([0-9]\+\).*/\1/p" ../makefiles/version.mk' +get_minor = r'sed -n -e "s/^NCCL_MINOR.*\([0-9]\{2,\}\).*/\1/p" ../makefiles/version.mk' +get_patch = r'sed -n -e "s/^NCCL_PATCH.*\([0-9]\+\).*/\1/p" ../makefiles/version.mk' +major = subprocess.getoutput(get_major) +minor = subprocess.getoutput(get_minor) +patch = subprocess.getoutput(get_patch) + +external_toc_path = "./sphinx/_toc.yml" + +docs_core = ROCmDocs(f"{name} {major}.{minor}.{patch} Documentation") +docs_core.run_doxygen(doxygen_root="doxygen", doxygen_path="doxygen/docBin/xml") docs_core.setup() for sphinx_var in ROCmDocs.SPHINX_VARS: diff --git a/docs/.doxygen/Doxyfile b/docs/doxygen/Doxyfile similarity index 100% rename from docs/.doxygen/Doxyfile rename to docs/doxygen/Doxyfile diff --git a/docs/index.rst b/docs/index.rst index 8da1bc8997..aacc95593b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,9 +1,11 @@ -****************** -RCCL documentation -****************** +**** +RCCL +**** + +The ROCm Collective Communication Library (RCCL) is a stand-alone library which provides multi-GPU and multi-node collective communication primitives optimized for AMD GPUs. + +RCCL (pronounced “Rickel”) implements routines such as all-reduce, all-gather, reduce, broadcast, reduce-scatter, gather, scatter, all-to-allv, and all-to-all as well as direct point-to-point (GPU-to-GPU) send and receive operations. + +The provided collective communication routines are implemented using Ring and Tree algorithms. They are optimized to achieve high bandwidth and low latency by leveraging topology awareness, high-speed interconnects, RDMA based collectives. RCCL utilizes PCIe and xGMI high-speed interconnects for intra-node communication as well as InfiniBand, RoCE, and TCP/IP for inter-node communication. -The ROCm Collective Communication Library (RCCL) is a stand-alone library which provides multi-GPU and multi-node collective communication primitives optimized for AMD GPUs. RCCL (pronounced “Rickel”) implements routines such as all-reduce, all-gather, reduce, broadcast, reduce-scatter, gather, scatter, all-to-allv, and all-to-all as well as direct point-to-point (GPU-to-GPU) send and receive operations. The provided collective communication routines are implemented using Ring and Tree algorithms. They are optimized to achieve high bandwidth and low latency by leveraging topology awareness, high-speed interconnects, RDMA based collectives. RCCL utilizes PCIe and xGMI high-speed interconnects for intra-node communication as well as InfiniBand, RoCE, and TCP/IP for inter-node communication.  RCCL supports an arbitrary number of GPUs installed in a single-node or multi-node platform. It can be easily integrated into either single- or multi-process (e.g., MPI) applications. - - - diff --git a/docs/.sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in similarity index 100% rename from docs/.sphinx/_toc.yml.in rename to docs/sphinx/_toc.yml.in diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in new file mode 100644 index 0000000000..7f2c40a8e0 --- /dev/null +++ b/docs/sphinx/requirements.in @@ -0,0 +1 @@ +rocm-docs-core==0.13.4 diff --git a/docs/.sphinx/requirements.txt b/docs/sphinx/requirements.txt similarity index 92% rename from docs/.sphinx/requirements.txt rename to docs/sphinx/requirements.txt index 04fd196ba5..f49dc9eab1 100644 --- a/docs/.sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -46,6 +46,10 @@ idna==3.4 # via requests imagesize==1.4.1 # via sphinx +importlib-metadata==6.6.0 + # via sphinx +importlib-resources==5.12.0 + # via rocm-docs-core jinja2==3.1.2 # via # myst-parser @@ -85,6 +89,8 @@ pyjwt[crypto]==2.6.0 # via pygithub pynacl==1.5.0 # via pygithub +pytz==2023.3 + # via babel pyyaml==6.0 # via # myst-parser @@ -94,7 +100,7 @@ requests==2.31.0 # via # pygithub # sphinx -rocm-docs-core==0.13.3 +rocm-docs-core==0.13.4 # via -r requirements.in smmap==5.0.0 # via gitdb @@ -143,3 +149,7 @@ urllib3==1.26.15 # via requests wrapt==1.15.0 # via deprecated +zipp==3.15.0 + # via + # importlib-metadata + # importlib-resources