From 7456dc7d17fd67b2043f918bfdc28ca820952b6c Mon Sep 17 00:00:00 2001 From: Matt Williams Date: Thu, 20 Nov 2025 09:35:15 -0500 Subject: [PATCH] Fix ToC in API Library page (#2053) * Add intro and remove ToC [ROCm/rccl commit: 3495baa6b2b7d636008855f19752767b2505c6ef] --- .../rccl/docs/api-reference/api-library.rst | 11 ++++++-- projects/rccl/docs/doxygen/Doxyfile | 6 ++--- projects/rccl/docs/doxygen/mainpage.txt | 27 ------------------- projects/rccl/src/nccl.h.in | 1 + 4 files changed, 13 insertions(+), 32 deletions(-) delete mode 100644 projects/rccl/docs/doxygen/mainpage.txt diff --git a/projects/rccl/docs/api-reference/api-library.rst b/projects/rccl/docs/api-reference/api-library.rst index b9458a6772..fe2d123d1a 100644 --- a/projects/rccl/docs/api-reference/api-library.rst +++ b/projects/rccl/docs/api-reference/api-library.rst @@ -4,8 +4,15 @@ .. _api-library: -============= +*********** API library -============= +*********** + +RCCL (pronounced "Rickle") is a stand-alone library of standard collective communication routines for GPUs, implementing all-reduce, all-gather, reduce, broadcast, reduce-scatter, gather, scatter, and all-to-all. There is also initial support for direct GPU-to-GPU send and receive operations. It has been optimized to achieve high bandwidth on platforms using PCIe, xGMI as well as networking using InfiniBand Verbs or TCP/IP sockets. RCCL supports an arbitrary number of GPUs installed in a single node or multiple nodes, and can be used in either single- or multi-process (e.g., MPI) applications. + +The collective operations are implemented using ring and tree algorithms and have been optimized for throughput and latency. For best performance, small operations can be either batched into larger operations or aggregated through the API. + +Operations +========== .. doxygenindex:: diff --git a/projects/rccl/docs/doxygen/Doxyfile b/projects/rccl/docs/doxygen/Doxyfile index 792b88fc74..958a618757 100644 --- a/projects/rccl/docs/doxygen/Doxyfile +++ b/projects/rccl/docs/doxygen/Doxyfile @@ -576,7 +576,7 @@ HIDE_SCOPE_NAMES = NO # YES the compound reference will be hidden. # The default value is: NO. -HIDE_COMPOUND_REFERENCE= NO +HIDE_COMPOUND_REFERENCE = NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. @@ -680,7 +680,7 @@ GENERATE_BUGLIST = YES # the documentation. # The default value is: YES. -GENERATE_DEPRECATEDLIST= YES +GENERATE_DEPRECATEDLIST = YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond @@ -829,7 +829,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = mainpage.txt ../../src/nccl.h.in +INPUT = ../../src/nccl.h.in # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/projects/rccl/docs/doxygen/mainpage.txt b/projects/rccl/docs/doxygen/mainpage.txt deleted file mode 100644 index 34c35a33c8..0000000000 --- a/projects/rccl/docs/doxygen/mainpage.txt +++ /dev/null @@ -1,27 +0,0 @@ -/*! \mainpage RCCL Documentation - -\tableofcontents - -\section intro_sec Introduction - -RCCL (pronounced "Rickle") is a stand-alone library of standard collective communication routines for GPUs, implementing all-reduce, all-gather, reduce, broadcast, reduce-scatter, gather, scatter, and all-to-all. There is also initial support for direct GPU-to-GPU send and receive operations. It has been optimized to achieve high bandwidth on platforms using PCIe, xGMI as well as networking using InfiniBand Verbs or TCP/IP sockets. RCCL supports an arbitrary number of GPUs installed in a single node or multiple nodes, and can be used in either single- or multi-process (e.g., MPI) applications. - -The collective operations are implemented using ring and tree algorithms and have been optimized for throughput and latency. For best performance, small operations can be either batched into larger operations or aggregated through the API. - -\section API RCCL API Contents -- @ref rccl_api_version -- @ref rccl_result_code -- @ref rccl_config_type -- @ref rccl_api_communicator -- @ref rccl_api_errcheck -- @ref rccl_api_comminfo -- @ref rccl_api_enumerations -- @ref rccl_api_custom_redop -- @ref rccl_collective_api -- @ref rccl_group_api -- @ref msccl_api - -\section Full RCCL API File -- nccl.h.in - -*/ diff --git a/projects/rccl/src/nccl.h.in b/projects/rccl/src/nccl.h.in index 88c4c24344..523b6d9079 100644 --- a/projects/rccl/src/nccl.h.in +++ b/projects/rccl/src/nccl.h.in @@ -5,6 +5,7 @@ * * See LICENSE.txt for license information ************************************************************************/ + #ifndef NCCL_H_ #define NCCL_H_