2023-05-25 16:08:54 -06:00
# Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved.
2022-12-13 07:51:04 +08:00
# Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License.
2019-07-05 15:43:00 -07:00
2024-05-31 17:58:34 -05:00
# CMake version minimum requirements
#==================================================================================================
2025-02-11 08:51:39 -07:00
cmake_minimum_required ( VERSION 3.16 )
2019-07-05 15:43:00 -07:00
2024-05-31 17:58:34 -05:00
# CMake Toolchain file to define compilers and path to ROCm
#==================================================================================================
if ( NOT CMAKE_TOOLCHAIN_FILE )
set ( CMAKE_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/toolchain-linux.cmake" )
message ( STATUS "CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}" )
endif ( )
2023-05-25 16:08:54 -06:00
# RCCL project
#==================================================================================================
project ( rccl CXX )
2021-04-14 08:29:00 -07:00
2023-05-25 16:08:54 -06:00
# Build options
#==================================================================================================
option ( BUILD_ADDRESS_SANITIZER "Enable address sanitizer" OFF )
2023-09-22 13:05:11 +00:00
option ( BUILD_BFD "Enable custom backtrace (if bfd.h exists)" OFF )
2023-05-25 16:08:54 -06:00
option ( BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on this machine" OFF )
option ( BUILD_SHARED_LIBS "Build as shared library" ON )
option ( BUILD_TESTS "Build unit test programs" OFF )
option ( COLLTRACE "Collective Trace Option" ON )
2025-09-23 10:11:32 -07:00
option ( DUMP_ASM "Disassemble and dump" OFF )
2025-06-10 12:12:36 -05:00
option ( ENABLE_CODE_COVERAGE "Enable code coverage" OFF )
2026-01-20 13:01:49 -06:00
option ( ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" OFF )
2025-08-28 09:52:12 -05:00
option ( ENABLE_MSCCLPP "Enable MSCCL++" OFF )
2025-06-17 01:29:55 -05:00
option ( ENABLE_MSCCLPP_CLIP "Enable MSCCL++ CLIP" OFF )
option ( ENABLE_MSCCLPP_EXECUTOR "Enable MSCCL++ Executor" OFF )
2025-07-02 09:11:42 -05:00
option ( ENABLE_MSCCLPP_FORMAT_CHECKS "Enable formatting checks in MSCCL++" OFF )
2026-01-26 23:12:16 -07:00
option ( MSCCLPP_APPLY_PATCHES "Apply source code patches to MSCCL++" ON )
2025-06-23 21:51:49 -05:00
option ( ENABLE_NPKIT "Enable NPKit" OFF )
2024-01-18 15:29:36 -07:00
option ( ENABLE_IFC "Enable indirect function call" OFF )
2025-09-15 12:19:35 -04:00
option ( GENERATE_SYM_KERNELS "Generate symmetric memory kernels" OFF )
2023-05-25 16:08:54 -06:00
option ( INSTALL_DEPENDENCIES "Force install dependencies" OFF )
2025-10-13 09:12:10 -07:00
option ( REPORT_KERNEL_RESOURCE_USE "Append -Rpass-analysis=kernel to CXX flags" OFF )
2025-01-29 11:29:46 -05:00
option ( ROCTX "Enable ROCTX" ON )
2023-05-25 16:08:54 -06:00
option ( PROFILE "Enable profiling" OFF )
option ( TIMETRACE "Enable time-trace during compilation" OFF )
option ( TRACE "Enable additional tracing" OFF )
2025-03-20 16:11:43 -07:00
option ( FAULT_INJECTION "Enable fault injection" ON )
2025-11-16 22:35:06 -08:00
option ( QUIET_WARNINGS "Supress compiler warnings" OFF )
2026-01-09 14:04:54 -06:00
option ( ENABLE_ROCSHMEM "Enable rocSHMEM support in RCCL" OFF )
2023-05-25 16:08:54 -06:00
# Default GPU architectures to build
#==================================================================================================
set ( DEFAULT_GPUS
2024-08-06 10:53:51 -05:00
gfx906
gfx908
gfx90a
2023-07-21 07:31:27 -07:00
gfx942
2025-02-25 16:13:48 -05:00
gfx950
2023-05-25 16:08:54 -06:00
gfx1030
gfx1100
gfx1101
2024-07-10 13:32:09 -06:00
gfx1102
gfx1200
gfx1201 )
2023-05-25 16:08:54 -06:00
# Load CMake modules
#==================================================================================================
include ( CheckIncludeFiles )
include ( CheckSymbolExists )
include ( cmake/Dependencies.cmake ) # GTest, rocm-cmake, rocm_local_targets
2024-11-04 09:46:42 -07:00
include ( cmake/CheckSymbolExistsNoWarn.cmake )
2026-01-26 23:12:16 -07:00
include ( cmake/MSCCLPP.cmake )
2023-05-25 16:08:54 -06:00
2026-01-09 14:04:54 -06:00
# Include rocSHMEM build module only if enabled
if ( ENABLE_ROCSHMEM )
include ( cmake/ROCSHMEM.cmake )
endif ( )
2024-07-12 15:32:58 -06:00
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" )
2024-01-09 13:29:47 -06:00
# Build only for local GPU architecture
2023-05-25 16:08:54 -06:00
if ( BUILD_LOCAL_GPU_TARGET_ONLY )
message ( STATUS "Building only for local GPU target" )
if ( COMMAND rocm_local_targets )
rocm_local_targets ( DEFAULT_GPUS )
else ( )
2024-01-09 13:29:47 -06:00
message ( WARNING "Unable to determine local GPU targets. Falling back to default GPUs." )
2023-05-25 16:08:54 -06:00
endif ( )
2020-12-04 13:55:56 -08:00
endif ( )
2024-01-09 13:29:47 -06:00
# Determine which GPU architectures to build for
2024-12-12 12:09:30 -07:00
set ( GPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if GPU_TARGETS is not defined." )
2024-08-06 10:53:51 -05:00
2025-12-23 07:33:10 -08:00
# ROCM NetIB patch
include ( cmake/rocmIb.cmake )
2024-08-06 10:53:51 -05:00
# Modify GPU architectures for Address Sanitizer builds by appending "xnack+"
if ( BUILD_ADDRESS_SANITIZER )
SET ( amdgpu_targets "" )
2024-12-12 12:09:30 -07:00
foreach ( amdgpu_target IN LISTS GPU_TARGETS )
2024-08-06 10:53:51 -05:00
if ( NOT amdgpu_target STREQUAL "" )
2024-12-26 12:13:36 -06:00
string ( FIND "${amdgpu_target}" ":xnack+" HAS_XNACK_SUFFIX )
if ( HAS_XNACK_SUFFIX EQUAL -1 )
list ( APPEND amdgpu_targets "${amdgpu_target}:xnack+" )
else ( )
list ( APPEND amdgpu_targets "${amdgpu_target}" )
endif ( )
2024-08-06 10:53:51 -05:00
endif ( )
endforeach ( )
2025-02-16 22:46:37 -05:00
SET ( GPU_TARGETS "${amdgpu_targets}" )
2024-08-06 10:53:51 -05:00
endif ( )
2024-12-12 12:09:30 -07:00
# Check if clang compiler can offload to GPU_TARGETS
2024-01-09 13:29:47 -06:00
if ( COMMAND rocm_check_target_ids )
2024-12-12 12:09:30 -07:00
message ( STATUS "Checking for ROCm support for GPU targets: " "${GPU_TARGETS}" )
rocm_check_target_ids ( SUPPORTED_GPUS TARGETS ${ GPU_TARGETS } )
2023-05-25 16:08:54 -06:00
else ( )
2024-01-09 13:29:47 -06:00
message ( WARNING "Unable to check for supported GPU targets. Falling back to default GPUs." )
2023-05-25 16:08:54 -06:00
set ( SUPPORTED_GPUS ${ DEFAULT_GPUS } )
endif ( )
2024-08-06 10:53:51 -05:00
2025-02-16 22:46:37 -05:00
set ( GPU_TARGETS "${SUPPORTED_GPUS}" )
message ( STATUS "Compiling for ${GPU_TARGETS}" )
2020-12-04 13:55:56 -08:00
2025-02-16 22:46:37 -05:00
## NOTE: Reload rocm-cmake in order to update GPU_TARGETS
include ( cmake/Dependencies.cmake ) # Reloading to use desired GPU_TARGETS instead of defaults
2023-05-25 16:08:54 -06:00
# Try to establish ROCM_PATH (for find_package)
#==================================================================================================
2021-06-22 13:29:08 -07:00
if ( NOT DEFINED ROCM_PATH )
2024-08-25 13:44:22 -04:00
# Guess default location
set ( ROCM_PATH "/opt/rocm" )
message ( WARNING "Unable to find ROCM_PATH: Falling back to ${ROCM_PATH}" )
2023-05-25 16:08:54 -06:00
else ( )
message ( STATUS "ROCM_PATH found: ${ROCM_PATH}" )
2021-06-22 13:29:08 -07:00
endif ( )
2025-02-03 14:51:55 -05:00
set ( ENV{ROCM_PATH} ${ ROCM_PATH } )
2021-06-22 13:29:08 -07:00
2024-08-25 13:44:22 -04:00
if ( "${CMAKE_CXX_COMPILER}" MATCHES ".*amdclang\\+\\+" )
message ( STATUS "Compiling with amdclang++" )
set ( COMPILER_EXE_NAME amdclang++ )
set ( COMPILER_GREP_STRING "AMD clang version" )
set ( COMPILER_AWK_CMD "awk -F\" \" '{ printf $ 4}' ")
2024-09-05 09:59:58 -05:00
elseif(" ${ CMAKE_CXX_COMPILER } " MATCHES " .*clang\\+\\+ ")
message(STATUS " Compiling with clang++ ")
set(COMPILER_EXE_NAME clang++)
set(COMPILER_GREP_STRING " AMD clang version ")
set(COMPILER_AWK_CMD " awk -F\ " \" '{ printf $ 4}' ")
2024-08-25 13:44:22 -04:00
elseif(" ${ CMAKE_CXX_COMPILER } " MATCHES " .*hipcc $ ")
message(STATUS " Compiling with hipcc ")
set(COMPILER_EXE_NAME hipcc)
set(COMPILER_GREP_STRING " HIP version ")
set(COMPILER_AWK_CMD " awk -F\ " \" '{ printf $ 3}' | awk -F\ "-\" '{ printf $ 1}' ")
else()
message(FATAL_ERROR " RCCL can be built only with hipcc or amdclang++ ")
endif()
2023-05-25 16:08:54 -06:00
# Set CMAKE flags
#==================================================================================================
2021-06-22 13:29:08 -07:00
set(CMAKE_INSTALL_PREFIX " ${ ROCM_PATH } " CACHE PATH " ")
2025-08-28 15:45:42 -05:00
set(CMAKE_CXX_STANDARD 17) # We use C++17 features, this will add compile option: -std=c++17
set(CMAKE_CXX_EXTENSIONS OFF) # Without this line, it will add -std=gnu++17 instead, which has some issues.
2025-02-11 09:48:22 -08:00
if(ROCM_PATH)
list(APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA
${ROCM_PATH}
${ROCM_PATH}/hip
${ROCM_PATH}/llvm)
endif()
2021-06-22 13:29:08 -07:00
2023-05-25 16:08:54 -06:00
# Check for required dependencies
#==================================================================================================
## Check for Threads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
## Check for HIP
find_package(hip REQUIRED)
message(STATUS " HIP compiler: ${ HIP_COMPILER } ")
message(STATUS " HIP runtime: ${ HIP_RUNTIME } ")
if(NOT " ${ HIP_COMPILER } " MATCHES " clang ")
2024-08-25 13:44:22 -04:00
message(FATAL_ERROR " RCCL requires clang-based compiler ( amdclang++ or hipcc ) ")
2023-05-25 16:08:54 -06:00
endif()
2024-08-25 13:44:22 -04:00
## Check for compiler version
find_program(compiler_executable ${COMPILER_EXE_NAME})
message(STATUS " ${ COMPILER_EXE_NAME } executable: ${ compiler_executable } ")
2023-05-25 16:08:54 -06:00
execute_process(
2024-08-25 13:44:22 -04:00
COMMAND bash " -c " " ${ compiler_executable } --version | grep \"${COMPILER_GREP_STRING}\" | ${ COMPILER_AWK_CMD } "
OUTPUT_VARIABLE compiler_version_string)
message(STATUS " ${ COMPILER_EXE_NAME } version: ${ compiler_version_string } ")
## Check for HIP version
find_program(hipconfig_executable hipconfig)
message(STATUS " hipconfig executable: ${ hipconfig_executable } ")
execute_process(
COMMAND bash " -c " " ${ hipconfig_executable } -v | awk -F\ "-\" '{ printf $ 1 }' "
OUTPUT_VARIABLE hip_version_string)
message(STATUS " ${ COMPILER_EXE_NAME } HIP version: ${ hip_version_string } ")
2023-05-25 16:08:54 -06:00
2023-11-15 12:36:31 -07:00
## Check for ROCm version
2025-02-11 09:48:22 -08:00
set(EXPLICIT_ROCM_VERSION " " CACHE STRING " Explicit ROCM version to compile to ( auto detect if empty ) ")
2025-10-21 13:53:57 -04:00
if(NOT DEFINED ROCMCORE_PATH)
set(ROCMCORE_PATH " ${ ROCM_PATH } " CACHE PATH " Path to ROCm core ")
endif()
2025-02-11 09:48:22 -08:00
if(EXPLICIT_ROCM_VERSION)
set(rocm_version_string " ${ EXPLICIT_ROCM_VERSION } ")
2025-10-21 13:53:57 -04:00
elseif(ROCMCORE_PATH)
message(STATUS " Reading ROCM version from ${ ROCMCORE_PATH } /.info/version ")
file(READ " ${ ROCMCORE_PATH } /.info/version " rocm_version_string)
2025-02-11 09:48:22 -08:00
else()
message(FATAL_ERROR " Could not determine ROCM version ( set EXPLICIT_ROCM_VERSION or set ROCM_PATH to a valid installation ) ")
endif()
2023-11-15 12:36:31 -07:00
string(REGEX MATCH " ( [0-9]+ ) \\.([0-9]+)\\.([0-9]+)" rocm_version_matches ${ rocm_version_string } )
if ( rocm_version_matches )
set ( ROCM_MAJOR_VERSION ${ CMAKE_MATCH_1 } )
set ( ROCM_MINOR_VERSION ${ CMAKE_MATCH_2 } )
set ( ROCM_PATCH_VERSION ${ CMAKE_MATCH_3 } )
message ( STATUS "ROCm version: ${ROCM_MAJOR_VERSION}.${ROCM_MINOR_VERSION}.${ROCM_PATCH_VERSION}" )
# Convert the version components to int for comparison
math ( EXPR ROCM_VERSION "(10000 * ${ROCM_MAJOR_VERSION}) + (100 * ${ROCM_MINOR_VERSION}) + ${ROCM_PATCH_VERSION}" )
add_definitions ( "-DROCM_VERSION=${ROCM_VERSION}" )
else ( )
message ( WARNING "Failed to extract ROCm version." )
endif ( )
2024-08-25 13:44:22 -04:00
### Required for checking HIP device symbols when building with amdclang++
set ( CMAKE_REQUIRED_LIBRARIES hip::device )
2023-09-12 15:34:40 -04:00
### Check for hipDeviceMallocUncached support
check_symbol_exists ( "hipDeviceMallocUncached" "hip/hip_runtime_api.h" HIP_UNCACHED_MEMORY )
2025-07-01 14:38:49 -07:00
### Check for hipHostMallocUncached support
check_symbol_exists ( "hipHostMallocUncached" "hip/hip_runtime_api.h" HIP_HOST_UNCACHED_MEMORY )
2024-02-29 10:06:43 -08:00
### Check for hipDeviceMallocContiguous support
check_symbol_exists ( "hipDeviceMallocContiguous" "hip/hip_runtime_api.h" HIP_CONTIGUOUS_MEMORY )
2024-08-25 13:44:22 -04:00
unset ( CMAKE_REQUIRED_LIBRARIES )
2023-05-25 16:08:54 -06:00
### Check for indirect function call support
if ( ENABLE_IFC )
2025-02-11 09:48:22 -08:00
if ( "${hip_version_string}" VERSION_GREATER_EQUAL "5.5.30201" )
2023-05-25 16:08:54 -06:00
set ( IFC_ENABLED ON )
message ( STATUS "Indirect function call enabled" )
else ( )
set ( IFC_ENABLED OFF )
2024-08-25 13:44:22 -04:00
message ( WARNING "Indirect function call disabled - requires HIP version >= 5.5.30201" )
2023-05-25 16:08:54 -06:00
endif ( )
2024-10-03 10:21:19 -04:00
else ( )
set ( IFC_ENABLED OFF )
endif ( )
## Check for LL128 support
2025-02-11 09:48:22 -08:00
if ( "${hip_version_string}" VERSION_GREATER_EQUAL "6.1.33591" )
2024-10-03 10:21:19 -04:00
set ( LL128_ENABLED ON )
message ( STATUS "RCCL LL128 protocol enabled" )
else ( )
message ( STATUS "RCCL LL128 protocol disabled - requires HIP version >= 6.1.33591" )
2023-05-25 16:08:54 -06:00
endif ( )
## Check for hsa-runtime64
find_package ( hsa-runtime64 REQUIRED )
get_target_property ( HSA_INCLUDE_PATH hsa-runtime64::hsa-runtime64 INTERFACE_INCLUDE_DIRECTORIES )
message ( STATUS "HSA runtime: ${HSA_INCLUDE_PATH}" )
2026-01-21 09:05:47 -06:00
## Check for amd-smi if ROCm 7.11.0 or newer
if ( ROCM_VERSION VERSION_GREATER_EQUAL "71100" )
find_package ( amd_smi PATHS ${ ROCM_PATH } /lib/cmake/amd_smi )
if ( amd_smi_FOUND )
message ( STATUS "amd_smi_INCLUDE_DIR: ${amd_smi_INCLUDE_DIR}" )
message ( STATUS "amd_smi_LIB_DIR: ${amd_smi_LIB_DIR}" )
set ( SMI_INCLUDE_DIR "${amd_smi_INCLUDE_DIR}" CACHE INTERNAL "amd-smi include directory" )
set ( SMI_LIB_DIR "${amd_smi_LIB_DIR}" CACHE INTERNAL "amd-smi library directory" )
set ( SMI_LIB_NAME "amd-smi-lib" CACHE INTERNAL "amd-smi-lib for packaging" )
if ( NOT EXISTS "${SMI_INCLUDE_DIR}" OR NOT EXISTS "${SMI_LIB_DIR}" )
message ( FATAL_ERROR "amd_smi not found in ${SMI_INCLUDE_DIR}" )
endif ( )
message ( STATUS "Found amd_smi at ${SMI_INCLUDE_DIR}" )
set ( SMI_LIBRARIES amd_smi )
set ( USE_AMDSMI ON CACHE INTERNAL "Use amd-smi instead of rocm-smi" )
endif ( )
endif ( )
if ( NOT USE_AMDSMI )
## Fallback to rocm-smi if amd-smi not found or ROCm < 7.11.0
message ( WARNING "Could not find amd_smi. Falling back to rocm_smi." )
find_package ( rocm_smi PATHS ${ ROCM_PATH } /lib/cmake/rocm_smi )
if ( rocm_smi_FOUND )
set ( SMI_INCLUDE_DIR "${rocm_smi_INCLUDE_DIR}" CACHE INTERNAL "rocm-smi include directory" )
set ( SMI_LIB_DIR "${rocm_smi_LIB_DIR}" CACHE INTERNAL "rocm-smi library directory" )
else ( )
message ( WARNING "CMake could not find rocm-smi. Checking old include directory structure for rocm_smi" )
set ( SMI_INCLUDE_DIR "${ROCM_PATH}/rocm_smi/include" )
set ( SMI_LIB_DIR "${ROCM_PATH}/rocm_smi/lib" )
endif ( )
if ( NOT EXISTS "${SMI_INCLUDE_DIR}" OR NOT EXISTS "${SMI_LIB_DIR}" )
message ( FATAL_ERROR "rocm_smi not found in ${SMI_INCLUDE_DIR}" )
endif ( )
message ( STATUS "Found rocm_smi at ${SMI_INCLUDE_DIR}" )
set ( SMI_LIB_NAME "rocm-smi-lib" CACHE INTERNAL "rocm-smi-lib for packaging" )
set ( SMI_LIBRARIES rocm_smi64 )
check_include_file_cxx ( "${SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG )
### Check for RSMI_INIT_FLAG_THRAD_ONLY_MUTEX support
file ( READ "${SMI_INCLUDE_DIR}/rocm_smi/rocm_smi.h" rocm_smi_incl )
string ( FIND "${rocm_smi_incl}" "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX" matchres )
if ( ${ matchres } EQUAL -1 )
message ( STATUS "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX not supported" )
else ( )
message ( STATUS "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX supported" )
set ( HAVE_ROCM_SMI_THREAD_ONLY_MUTEX True )
endif ( )
endif ( )
2023-05-25 16:08:54 -06:00
## Check for BFD library if custom backtrace is requested
if ( BUILD_BFD )
enable_language ( C )
check_include_files ( bfd.h HAVE_BFD )
if ( HAVE_BFD )
message ( STATUS "-- Found BFD support" )
2024-09-05 09:59:58 -05:00
### Required for checking HIP device symbols when building with amdclang++
set ( CMAKE_REQUIRED_LIBRARIES hip::device )
2023-05-25 16:08:54 -06:00
# Check for specific BFD feature support
CHECK_SYMBOL_EXISTS ( bfd_get_section_flags "bfd.h" HAVE_DECL_BFD_GET_SECTION_FLAGS )
CHECK_SYMBOL_EXISTS ( bfd_get_section_vma "bfd.h" HAVE_DECL_BFD_GET_SECTION_VMA )
CHECK_CXX_SOURCE_COMPILES (
"#include <bfd.h>
int main (int argc, char **argv){
bfd_size_type size;
bfd abfd;
asection sec;
size = bfd_section_size(&abfd, &sec);
return (int)(size);
}"
HAVE_TWO_ARG_BFD_SECTION_SIZE )
2024-09-05 09:59:58 -05:00
unset ( CMAKE_REQUIRED_LIBRARIES )
2023-05-25 16:08:54 -06:00
# Check for iberty support
find_library ( HAVE_IBERTY iberty PATHS /usr/lib64 /usr/lib/ PATH_SUFFIXES x86_64-linux-gnu )
if ( HAVE_IBERTY )
message ( STATUS "iberty found @ ${HAVE_IBERTY}" )
endif ( )
# Check for demangle support
find_path ( DEMANGLE_DIR demangle.h PATHS /usr/include PATH_SUFFIXES libiberty )
if ( NOT DEMANGLE_DIR )
message ( WARNING "Could not find demangle.h ${DEMANGLE_DIR}" )
2021-04-14 08:29:00 -07:00
else ( )
2023-05-25 16:08:54 -06:00
message ( STATUS "Found demangle.h in ${DEMANGLE_DIR}" )
2021-04-14 08:29:00 -07:00
endif ( )
2023-05-25 16:08:54 -06:00
else ( )
message ( WARNING "bfd.h header not found - Disabling custom backtrace" )
endif ( )
2020-12-04 13:55:56 -08:00
endif ( )
2019-10-22 17:10:07 -04:00
2026-01-09 14:04:54 -06:00
2023-10-12 20:17:08 -05:00
# Check for --amdgpu-kernarg-preload-count
check_cxx_compiler_flag ( "-mllvm --amdgpu-kernarg-preload-count=16" HAVE_KERNARG_PRELOAD )
if ( HAVE_KERNARG_PRELOAD )
message ( STATUS "Kernarg preloading to SGPR enabled" )
endif ( )
2024-12-11 12:40:49 -05:00
check_cxx_compiler_flag ( "-parallel-jobs=12" HAVE_PARALLEL_JOBS )
if ( HAVE_PARALLEL_JOBS )
message ( STATUS "Parallel jobs enabled" )
endif ( )
2024-07-22 00:41:31 -06:00
## Disable building MSCCL++ if the build environment is invalid
2025-02-25 16:13:48 -05:00
## Currently MSCCL++ is supported only on gfx942 and gfx950, and only on Ubuntu and CentOS
set ( MSCCLPP_SUPPORTED_ARCHS "gfx942" "gfx942:xnack-" "gfx942:xnack+" "gfx950" "gfx950:xnack-" "gfx950:xnack+" )
2025-03-01 22:28:42 -06:00
2025-12-11 19:04:35 -05:00
2025-02-25 16:13:48 -05:00
# Check if any of the supported architectures are in GPU_TARGETS
set ( ARCH_MATCH_FOUND OFF )
2025-03-01 22:28:42 -06:00
set ( MSCCLPP_GPU_TARGETS "" )
foreach ( ARCH IN LISTS GPU_TARGETS )
if ( ARCH IN_LIST MSCCLPP_SUPPORTED_ARCHS )
2025-02-25 16:13:48 -05:00
set ( ARCH_MATCH_FOUND ON )
2025-03-01 22:28:42 -06:00
list ( APPEND MSCCLPP_GPU_TARGETS "${ARCH}" )
2025-02-25 16:13:48 -05:00
endif ( )
endforeach ( )
2025-03-01 22:28:42 -06:00
set ( MSCCLPP_GPU_TARGETS "${MSCCLPP_GPU_TARGETS}" CACHE STRING "GPU Targets supported by MSCCL++" FORCE )
2025-02-25 16:13:48 -05:00
if ( ENABLE_MSCCLPP AND NOT ARCH_MATCH_FOUND )
2024-10-30 16:48:54 -06:00
set ( ENABLE_MSCCLPP OFF )
2025-03-01 22:28:42 -06:00
message ( WARNING "Can only build MSCCL++ for supported GPU_TARGETS: ${MSCCLPP_SUPPORTED_ARCHS}; current GPU_TARGETS: ${GPU_TARGETS}; so disabling MSCCL++ build" )
2024-10-30 16:48:54 -06:00
endif ( )
2025-03-01 22:28:42 -06:00
# MSCCL++ is only supported on ROCm 6.2.0 or newer
2024-10-30 16:48:54 -06:00
if ( ENABLE_MSCCLPP AND ROCM_VERSION VERSION_LESS "60200" )
set ( ENABLE_MSCCLPP OFF )
2025-03-01 22:28:42 -06:00
message ( WARNING "MSCCL++ integration only supported on ROCm 6.2.0 or greater; disabling MSCCL++ build" )
2024-07-12 15:32:58 -06:00
endif ( )
2025-03-01 22:28:42 -06:00
2025-12-11 19:04:35 -05:00
## Disable WARP_SPEED if the build environment is invalid
set ( WARP_SPEED_SUPPORTED_ARCHS "gfx942" "gfx942:xnack-" "gfx942:xnack+" "gfx950" "gfx950:xnack-" "gfx950:xnack+" )
set ( ARCH_MATCH_FOUND OFF )
foreach ( ARCH IN LISTS GPU_TARGETS )
if ( ARCH IN_LIST WARP_SPEED_SUPPORTED_ARCHS )
set ( ARCH_MATCH_FOUND ON )
endif ( )
endforeach ( )
if ( NOT ARCH_MATCH_FOUND )
set ( ENABLE_WARP_SPEED OFF )
message ( WARNING "Can only build WARP_SPEED for supported GPU_TARGETS: ${WARP_SPEED_SUPPORTED_ARCHS}; current GPU_TARGETS: ${GPU_TARGETS}; so disabling WARP_SPEED build" )
endif ( )
2025-02-11 08:51:39 -07:00
# cmake_host_system_information(RESULT HOST_OS_ID QUERY DISTRIB_ID) ## Requires cmake 3.22
execute_process (
2025-02-25 13:03:04 -06:00
COMMAND bash -c "grep '^ID=' /etc/os-release | cut -d'=' -f2 | cut -d'\" ' -f2 "
2025-02-11 08:51:39 -07:00
OUTPUT_VARIABLE HOST_OS_ID
OUTPUT_STRIP_TRAILING_WHITESPACE
)
2025-08-08 12:28:56 -05:00
execute_process(
COMMAND bash -c " grep '^ID_LIKE=' /etc/os-release | cut -d'=' -f2 | cut -d'\ "' -f2"
OUTPUT_VARIABLE HOST_OS_FAMILY
OUTPUT_STRIP_TRAILING_WHITESPACE
)
2025-01-29 08:54:09 -07:00
if ( ENABLE_MSCCLPP AND NOT( ${ HOST_OS_ID } STREQUAL "ubuntu" OR ${ HOST_OS_ID } STREQUAL "centos" ) )
set ( ENABLE_MSCCLPP OFF )
message ( WARNING "MSCCL++ integration not supported on this OS (${HOST_OS_ID}); disabling MSCCL++ build" )
endif ( )
2024-07-12 15:32:58 -06:00
2025-01-29 11:29:46 -05:00
# Check for ROCTX
if ( ROCTX )
find_library ( ROCTX_LIB NAMES roctx64 )
2025-10-21 13:53:57 -04:00
find_path ( ROCTRACER_INCLUDE_DIR "roctracer/roctx.h" )
if ( ROCTX_LIB AND ROCTRACER_INCLUDE_DIR )
2025-01-29 11:29:46 -05:00
set ( ROCTX_ENABLE ON )
2025-10-21 13:53:57 -04:00
message ( STATUS "ROCTX include directory found: ${ROCTRACER_INCLUDE_DIR}" )
2025-01-29 11:29:46 -05:00
message ( STATUS "ROCTX library found: ${ROCTX_LIB}" )
else ( )
message ( WARNING "ROCTX library not found. Skipping ROCTX linking." )
endif ( )
endif ( )
2023-05-25 16:08:54 -06:00
# Determine version from makefiles/version.mk and fill in templates
#==================================================================================================
## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
## NCCL_SUFFIX is optional
## NCCL_VERSION formatting is ((X) * 1000 + (Y) * 100 + (Z)) so we must first detect one or two digits first
2019-07-05 15:43:00 -07:00
file ( READ makefiles/version.mk version_mk_text )
if ( "${version_mk_text}" MATCHES "NCCL_MAJOR *:= *([0-9]*)" )
set ( NCCL_MAJOR ${ CMAKE_MATCH_1 } )
else ( )
message ( FATAL_ERROR "Failed to parse NCCL_MAJOR" )
endif ( )
if ( "${version_mk_text}" MATCHES "NCCL_MINOR *:= *([0-9]*)" )
set ( NCCL_MINOR ${ CMAKE_MATCH_1 } )
else ( )
message ( FATAL_ERROR "Failed to parse NCCL_MINOR" )
endif ( )
if ( "${version_mk_text}" MATCHES "NCCL_PATCH *:= *([0-9]*)" )
set ( NCCL_PATCH ${ CMAKE_MATCH_1 } )
else ( )
message ( FATAL_ERROR "Failed to parse NCCL_PATCH" )
endif ( )
if ( "${version_mk_text}" MATCHES "NCCL_SUFFIX *:= *([0-9]*)" )
set ( NCCL_SUFFIX ${ CMAKE_MATCH_1 } )
else ( )
set ( NCCL_SUFFIX )
endif ( )
if ( "${version_mk_text}" MATCHES "PKG_REVISION *:= *([0-9]*)" )
set ( PKG_REVISION ${ CMAKE_MATCH_1 } )
else ( )
message ( FATAL_ERROR "Failed to parse PKG_REVISION" )
endif ( )
if ( "${NCCL_PATCH}" MATCHES "[0-9][0-9]" )
set ( NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}${NCCL_PATCH}" )
else ( )
set ( NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}0${NCCL_PATCH}" )
endif ( )
2023-05-25 16:08:54 -06:00
## Setup VERSION
2020-07-20 14:43:00 -07:00
set ( VERSION_STRING "${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}" )
rocm_setup_version ( VERSION ${ VERSION_STRING } )
2019-07-05 15:43:00 -07:00
2023-05-25 16:08:54 -06:00
## Fill in version information for main header file
configure_file ( src/nccl.h.in ${ PROJECT_BINARY_DIR } /include/rccl/rccl.h ) # For external linking
configure_file ( src/nccl.h.in ${ PROJECT_BINARY_DIR } /include/nccl.h ) # Used by some internal files
# Collect list of all source files
#==================================================================================================
# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort
set ( SRC_FILES
2025-08-28 15:45:42 -05:00
src/allocator.cc
2023-05-25 16:08:54 -06:00
src/bootstrap.cc
2026-01-20 13:01:49 -06:00
src/ce_coll.cc
2023-05-25 16:08:54 -06:00
src/channel.cc
2024-01-24 15:25:33 -08:00
src/collectives.cc
2025-12-11 17:02:35 -05:00
src/commDump.cc
2024-01-24 15:25:33 -08:00
src/debug.cc
2026-01-20 13:01:49 -06:00
src/dev_runtime.cc
2024-01-24 15:25:33 -08:00
src/enqueue.cc
src/group.cc
src/init.cc
2024-04-23 13:33:19 -07:00
src/init_nvtx.cc
2025-04-30 13:30:11 -05:00
src/mnnvl.cc
2024-01-24 15:25:33 -08:00
src/msccl.cc
src/proxy.cc
2025-04-10 11:43:54 -04:00
src/rccl_wrap.cc
2026-01-20 13:01:49 -06:00
src/sym_kernels.cc
2024-01-24 15:25:33 -08:00
src/transport.cc
src/device/all_gather.h
src/device/all_reduce.h
src/device/alltoall_pivot.h
2026-01-09 14:04:54 -06:00
src/device/alltoall_gda.h
2024-01-24 15:25:33 -08:00
src/device/broadcast.h
src/device/common.h
src/device/common_kernel.h
src/device/op128.h
src/device/primitives.h
src/device/prims_ll128.h
src/device/prims_ll.h
src/device/prims_simple.h
src/device/reduce.h
src/device/reduce_kernel.h
src/device/reduce_scatter.h
2025-07-22 07:15:15 -07:00
src/device/rccl_metadata.h
2025-10-28 10:34:48 -07:00
src/device/rccl_ptr.h
2024-01-24 15:25:33 -08:00
src/device/sendrecv.h
src/device/common.cu
src/device/onerank.cu
src/device/network/unpack/unpack_defs.h
src/device/network/unpack/unpack.h
2025-08-28 15:45:42 -05:00
src/device/symmetric/all_gather.cuh
src/device/symmetric/all_reduce.cuh
src/device/symmetric/kernel.cuh
src/device/symmetric/primitives.cuh
src/device/symmetric/reduce_scatter.cuh
2023-05-25 16:08:54 -06:00
src/graph/connect.cc
src/graph/paths.cc
src/graph/rings.cc
src/graph/rings.h
src/graph/rome_models.cc
src/graph/rome_models.h
src/graph/search.cc
src/graph/topo.cc
src/graph/topo.h
src/graph/trees.cc
src/graph/tuning.cc
src/graph/xml.cc
src/graph/xml.h
2022-11-07 14:09:26 -08:00
src/include/alloc.h
2025-08-28 15:45:42 -05:00
src/include/allocator.h
2025-01-23 11:48:18 -06:00
src/include/alt_rsmi.h
2024-01-24 15:25:33 -08:00
src/include/archinfo.h
2024-08-22 12:36:07 -05:00
src/include/api_trace.h
2023-05-25 16:08:54 -06:00
src/include/argcheck.h
src/include/BfdBacktrace.hpp
2025-01-23 11:48:18 -06:00
src/include/bitops.h
2023-05-25 16:08:54 -06:00
src/include/bootstrap.h
2026-01-20 13:01:49 -06:00
src/include/ce_coll.h
2023-05-25 16:08:54 -06:00
src/include/channel.h
2022-11-07 14:09:26 -08:00
src/include/checks.h
2023-05-25 16:08:54 -06:00
src/include/collectives.h
2022-11-07 14:09:26 -08:00
src/include/coll_net.h
2023-05-25 16:08:54 -06:00
src/include/comm.h
src/include/core.h
src/include/cpuset.h
# src/include/cudawrap.h
src/include/debug.h
2026-01-20 13:01:49 -06:00
src/include/dev_runtime.h
2024-01-24 15:25:33 -08:00
src/include/device.h
2022-11-07 14:09:26 -08:00
src/include/enqueue.h
2023-05-25 16:08:54 -06:00
src/include/gdrwrap.h
src/include/git_version.h
2022-11-07 14:09:26 -08:00
src/include/graph.h
2023-05-25 16:08:54 -06:00
src/include/group.h
2024-07-12 08:14:29 -05:00
src/include/hip_rocm_version_info.h
2023-06-21 20:54:24 -07:00
src/include/ibvcore.h
src/include/ibvsymbols.h
2023-05-25 16:08:54 -06:00
src/include/ibvwrap.h
2022-11-07 14:09:26 -08:00
src/include/info.h
2023-04-25 15:38:04 -07:00
src/include/ipcsocket.h
2025-04-30 13:30:11 -05:00
src/include/mnnvl.h
2024-01-24 15:25:33 -08:00
src/include/nccl_common.h
2026-01-20 13:01:49 -06:00
src/include/nccl_device.h
2024-01-24 15:25:33 -08:00
src/include/net_device.h
src/include/net.h
src/include/nvmlwrap.h
src/include/nvtx.h
2025-04-30 13:30:11 -05:00
src/include/nvtx_payload_schemas.h
2024-01-24 15:25:33 -08:00
src/include/nvtx_stub.h
src/include/p2p.h
src/include/param.h
src/include/profiler.h
src/include/proxy.h
2025-04-23 20:46:36 -07:00
src/include/ras.h
2025-04-10 11:43:54 -04:00
src/include/rccl_common.h
2024-01-24 15:25:33 -08:00
src/include/rccl_vars.h
2024-04-23 13:33:19 -07:00
src/include/register.h
2025-08-28 15:45:42 -05:00
src/include/register_inline.h
2024-03-09 07:17:53 +08:00
src/include/rccl_float8.h
2024-01-24 15:25:33 -08:00
src/include/rocmwrap.h
2024-02-27 15:46:15 -07:00
src/include/roctx.h
2025-04-19 00:21:27 -04:00
src/include/recorder.h
2026-01-20 13:01:49 -06:00
src/include/scheduler.h
2024-01-24 15:25:33 -08:00
src/include/shm.h
2025-03-27 12:51:55 -05:00
src/include/shmutils.h
2024-01-24 15:25:33 -08:00
src/include/signals.h
src/include/socket.h
src/include/strongstream.h
2026-01-20 13:01:49 -06:00
src/include/sym_kernels.h
2024-01-24 15:25:33 -08:00
src/include/timer.h
src/include/transport.h
src/include/trees.h
src/include/tuner.h
src/include/utils.h
2025-08-28 15:45:42 -05:00
src/include/mlx5/mlx5dvcore.h
src/include/mlx5/mlx5dvsymbols.h
src/include/mlx5/mlx5dvwrap.h
2025-12-23 07:33:10 -08:00
src/include/ionic/ionicdvcore.h
src/include/ionic/ionicdvsymbols.h
src/include/ionic/ionicdvwrap.h
2023-05-25 16:08:54 -06:00
src/include/msccl/msccl_lifecycle.h
src/include/msccl/msccl_parser.h
src/include/msccl/msccl_scheduler.h
src/include/msccl/msccl_setup.h
src/include/msccl/msccl_status.h
src/include/msccl/msccl_struct.h
2026-01-20 13:01:49 -06:00
src/include/nccl_device/comm.h
src/include/nccl_device/coop.h
src/include/nccl_device/core.h
src/include/nccl_device/ll_a2a.h
src/include/nccl_device/mem_barrier.h
src/include/nccl_device/ptr.h
src/include/nccl_device/utility.h
src/include/nccl_device/impl/comm__funcs.h
src/include/nccl_device/impl/comm__types.h
src/include/nccl_device/impl/core__funcs.h
src/include/nccl_device/impl/core__types.h
src/include/nccl_device/impl/ll_a2a__funcs.h
src/include/nccl_device/impl/ll_a2a__types.h
src/include/nccl_device/impl/mem_barrier__funcs.h
src/include/nccl_device/impl/mem_barrier__types.h
src/include/nccl_device/impl/ptr__funcs.h
src/include/nccl_device/impl/ptr__types.h
2022-11-07 14:09:26 -08:00
src/include/npkit/npkit.h
2024-01-24 15:25:33 -08:00
src/include/npkit/npkit_event.h
2022-11-07 14:09:26 -08:00
src/include/npkit/npkit_struct.h
2025-01-23 11:48:18 -06:00
src/include/nvtx3/nvToolsExt.h
src/include/nvtx3/nvToolsExtCounters.h
2023-05-25 16:08:54 -06:00
src/include/nvtx3/nvToolsExtCuda.h
src/include/nvtx3/nvToolsExtCudaRt.h
2025-01-23 11:48:18 -06:00
src/include/nvtx3/nvToolsExtMem.h
src/include/nvtx3/nvToolsExtMemCudaRt.h
2023-05-25 16:08:54 -06:00
src/include/nvtx3/nvToolsExtOpenCL.h
src/include/nvtx3/nvToolsExtPayload.h
2025-01-23 11:48:18 -06:00
src/include/nvtx3/nvToolsExtPayloadHelper.h
src/include/nvtx3/nvToolsExtSemanticsCounters.h
src/include/nvtx3/nvToolsExtSemanticsScope.h
2023-05-25 16:08:54 -06:00
src/include/nvtx3/nvToolsExtSync.h
src/include/nvtx3/nvtx3.hpp
2025-01-23 11:48:18 -06:00
src/include/nvtx3/nvtxDetail/nvtxExtHelperMacros.h
src/include/nvtx3/nvtxDetail/nvtxExtImpl.h
src/include/nvtx3/nvtxDetail/nvtxExtImplCounters_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtImplMem_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtImplMemCudaRt_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtImplPayload_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtInit.h
src/include/nvtx3/nvtxDetail/nvtxExtPayloadHelperInternal.h
src/include/nvtx3/nvtxDetail/nvtxExtPayloadTypeInfo.h
src/include/nvtx3/nvtxDetail/nvtxExtTypes.h
src/include/nvtx3/nvtxDetail/nvtxImpl.h
2023-05-25 16:08:54 -06:00
src/include/nvtx3/nvtxDetail/nvtxImplCore.h
src/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h
2025-01-23 11:48:18 -06:00
src/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h
2023-05-25 16:08:54 -06:00
src/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h
2022-11-07 14:09:26 -08:00
src/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h
2025-01-23 11:48:18 -06:00
src/include/nvtx3/nvtxDetail/nvtxInit.h
2022-11-07 14:09:26 -08:00
src/include/nvtx3/nvtxDetail/nvtxInitDecls.h
src/include/nvtx3/nvtxDetail/nvtxInitDefs.h
2023-05-25 16:08:54 -06:00
src/include/nvtx3/nvtxDetail/nvtxLinkOnce.h
src/include/nvtx3/nvtxDetail/nvtxTypes.h
2025-06-25 21:01:34 -07:00
src/include/proxy_trace/proxy_trace.h
2025-06-20 07:53:59 -05:00
src/include/plugin/nccl_net.h
src/include/plugin/nccl_profiler.h
src/include/plugin/nccl_tuner.h
src/include/plugin/plugin.h
src/include/plugin/net/net_v6.h
src/include/plugin/net/net_v7.h
src/include/plugin/net/net_v8.h
src/include/plugin/net/net_v9.h
src/include/plugin/net/net_v10.h
2026-01-20 13:01:49 -06:00
src/include/plugin/net/net_v11.h
2025-06-20 07:53:59 -05:00
src/include/plugin/profiler/net_ib_v1.h
src/include/plugin/profiler/net_ib.h
src/include/plugin/profiler/net_socket_v1.h
src/include/plugin/profiler/net_socket.h
src/include/plugin/profiler/profiler_v1.h
src/include/plugin/profiler/profiler_v2.h
src/include/plugin/profiler/profiler_v3.h
2025-08-28 15:45:42 -05:00
src/include/plugin/profiler/profiler_v4.h
2026-01-20 13:01:49 -06:00
src/include/plugin/profiler/profiler_v5.h
2025-06-20 07:53:59 -05:00
src/include/plugin/tuner/tuner_v2.h
src/include/plugin/tuner/tuner_v3.h
src/include/plugin/tuner/tuner_v4.h
2026-01-20 13:01:49 -06:00
src/include/plugin/tuner/tuner_v5.h
2024-05-10 09:08:36 -07:00
src/misc/alt_rsmi.cc
2023-09-12 15:34:40 -04:00
src/misc/archinfo.cc
2023-05-25 16:08:54 -06:00
src/misc/argcheck.cc
2024-08-22 12:36:07 -05:00
src/misc/api_trace.c
src/misc/api_trace.cc
2023-05-25 16:08:54 -06:00
# src/misc/cudawrap.cc
# src/misc/gdrwrap.cc
2023-06-21 20:54:24 -07:00
src/misc/ibvsymbols.cc
2023-05-25 16:08:54 -06:00
src/misc/ibvwrap.cc
src/misc/ipcsocket.cc
2025-08-28 15:45:42 -05:00
src/misc/mlx5dvsymbols.cc
src/misc/mlx5dvwrap.cc
2025-12-23 07:33:10 -08:00
src/misc/ionicdvsymbols.cc
src/misc/ionicdvwrap.cc
2023-05-25 16:08:54 -06:00
src/misc/npkit.cc
# src/misc/nvmlwrap.cc
src/misc/nvmlwrap_stub.cc
src/misc/param.cc
src/misc/rocmwrap.cc
2024-02-27 15:46:15 -07:00
src/misc/roctx.cc
2025-04-19 00:21:27 -04:00
src/misc/recorder.cc
2023-05-25 16:08:54 -06:00
src/misc/shmutils.cc
src/misc/signals.cc
src/misc/socket.cc
src/misc/strongstream.cc
src/misc/utils.cc
2024-01-24 15:25:33 -08:00
src/misc/msccl/msccl_lifecycle.cc
src/misc/msccl/msccl_parser.cc
src/misc/msccl/msccl_setup.cc
src/misc/msccl/msccl_status.cc
2025-06-25 21:01:34 -07:00
src/misc/proxy_trace/proxy_trace.cc
2026-01-20 13:01:49 -06:00
src/nccl_device/core.cc
src/nccl_device/ll_a2a.cc
src/nccl_device/mem_barrier.cc
2025-06-20 07:53:59 -05:00
src/plugin/net.cc
src/plugin/plugin_open.cc
src/plugin/profiler.cc
src/plugin/tuner.cc
src/plugin/net/net_v6.cc
src/plugin/net/net_v7.cc
src/plugin/net/net_v8.cc
src/plugin/net/net_v9.cc
src/plugin/net/net_v10.cc
2026-01-20 13:01:49 -06:00
src/plugin/net/net_v11.cc
2025-06-20 07:53:59 -05:00
src/plugin/profiler/profiler_v1.cc
src/plugin/profiler/profiler_v2.cc
src/plugin/profiler/profiler_v3.cc
2025-08-28 15:45:42 -05:00
src/plugin/profiler/profiler_v4.cc
2026-01-20 13:01:49 -06:00
src/plugin/profiler/profiler_v5.cc
2025-06-20 07:53:59 -05:00
src/plugin/tuner/tuner_v2.cc
src/plugin/tuner/tuner_v3.cc
src/plugin/tuner/tuner_v4.cc
2026-01-20 13:01:49 -06:00
src/plugin/tuner/tuner_v5.cc
2025-04-23 20:46:36 -07:00
src/ras/client.cc
src/ras/client_support.cc
src/ras/collectives.cc
src/ras/peers.cc
src/ras/ras.cc
src/ras/ras_internal.h
src/ras/rasnet.cc
src/register/coll_reg.cc
src/register/register.cc
src/register/sendrecv_reg.cc
2026-01-20 13:01:49 -06:00
src/scheduler/symmetric_sched.cc
2023-05-25 16:08:54 -06:00
src/transport/coll_net.cc
2025-01-23 11:48:18 -06:00
src/transport/generic.cc
2023-05-25 16:08:54 -06:00
src/transport/net.cc
src/transport/net_ib.cc
2025-12-23 07:33:10 -08:00
src/transport/net_ib_rocm.cc
2023-05-25 16:08:54 -06:00
src/transport/net_socket.cc
src/transport/nvls.cc
src/transport/p2p.cc
2025-06-20 07:53:59 -05:00
src/transport/profiler.cc
2023-05-25 16:08:54 -06:00
src/transport/shm.cc
2025-07-30 14:59:28 -07:00
src/include/latency_profiler/CollTrace.h
src/include/latency_profiler/CollTraceEvent.h
src/include/latency_profiler/CollTraceFunc.h
src/include/latency_profiler/CollTraceUtils.h
src/include/latency_profiler/EventQueue.h
src/misc/latency_profiler/CollTrace.cc
src/misc/latency_profiler/CollTraceEvent.cc
src/misc/latency_profiler/CollTraceFunc.cc
src/misc/latency_profiler/CollTraceUtils.cc
2023-05-25 16:08:54 -06:00
)
2026-01-21 09:05:47 -06:00
if ( USE_AMDSMI )
set ( SMI_SOURCES
src/include/amdsmi_wrap.h
src/misc/amdsmi_wrap.cc
)
else ( )
set ( SMI_SOURCES
src/include/rocm_smi_wrap.h
src/misc/rocm_smi_wrap.cc
)
endif ( )
list ( APPEND SRC_FILES ${ SMI_SOURCES } )
2023-08-02 09:45:18 -06:00
if ( ENABLE_MSCCL_KERNEL )
set ( MSCCL_KERNEL_SOURCES
2024-01-24 15:25:33 -08:00
src/device/msccl_kernel_impl.h
2023-08-02 09:45:18 -06:00
src/include/msccl/msccl_kernel.h
)
list ( APPEND SRC_FILES ${ MSCCL_KERNEL_SOURCES } )
endif ( )
2024-06-03 10:05:47 -06:00
2024-07-12 15:32:58 -06:00
if ( ENABLE_MSCCLPP )
set ( MSCCLPP_SOURCES
src/include/mscclpp/mscclpp_nccl.h
src/misc/mscclpp/mscclpp_nccl.cc
)
list ( APPEND SRC_FILES ${ MSCCLPP_SOURCES } )
endif ( )
2023-05-25 16:08:54 -06:00
# Hipify source files (copy of source generated into hipify directory)
#==================================================================================================
find_program ( hipify-perl_executable hipify-perl )
2025-02-11 09:48:22 -08:00
if ( NOT hipify-perl_executable )
message ( FATAL_ERROR "hipify-perl not found" )
endif ( )
2023-05-25 16:08:54 -06:00
set ( HIPIFY_DIR "${CMAKE_CURRENT_BINARY_DIR}/hipify" )
## Loop over each source file to hipify
foreach ( SRC_FILE ${ SRC_FILES } )
# Check that file exists
if ( NOT EXISTS ${ CMAKE_SOURCE_DIR } / ${ SRC_FILE } )
message ( FATAL_ERROR "Unable to find file listed in CMakeLists.txt: ${CMAKE_SOURCE_DIR}/${SRC_FILE}" )
endif ( )
# Establish hipified copy of the source file
set ( HIP_FILE "${HIPIFY_DIR}/${SRC_FILE}" )
get_filename_component ( HIP_FILE_DIR ${ HIP_FILE } DIRECTORY )
2024-04-15 09:46:53 -06:00
# Make sure the file name is unique and there is no duplicate
add_file_unique ( HIP_SOURCES ${ HIP_FILE } )
2023-05-25 16:08:54 -06:00
# Convert .cu files to .cpp so that they get processed properly
2025-08-28 15:45:42 -05:00
string ( REPLACE "\.cuh" "\.h" HIP_FILE ${ HIP_FILE } )
2023-05-25 16:08:54 -06:00
string ( REPLACE "\.cu" "\.cu.cpp" HIP_FILE ${ HIP_FILE } )
list ( APPEND HIP_SOURCES ${ HIP_FILE } )
# Create a custom command to create hipified source code
2025-03-20 16:11:43 -07:00
if ( FAULT_INJECTION )
add_custom_command (
OUTPUT ${ HIP_FILE }
COMMAND mkdir -p ${ HIP_FILE_DIR }
&& ${ hipify-perl_executable } -quiet-warnings ${ CMAKE_SOURCE_DIR } / ${ SRC_FILE } -o ${ HIP_FILE }
&& ${ CMAKE_COMMAND } -E env bash ${ CMAKE_CURRENT_SOURCE_DIR } /cmake/scripts/add_unroll.sh ${ HIP_FILE }
&& ${ CMAKE_COMMAND } -E env bash ${ CMAKE_CURRENT_SOURCE_DIR } /cmake/scripts/add_faults.sh ${ HIP_FILE }
MAIN_DEPENDENCY ${ SRC_FILE }
COMMENT "Hipifying ${SRC_FILE} -> ${HIP_FILE}"
)
else ( )
add_custom_command (
OUTPUT ${ HIP_FILE }
COMMAND mkdir -p ${ HIP_FILE_DIR }
&& ${ hipify-perl_executable } -quiet-warnings ${ CMAKE_SOURCE_DIR } / ${ SRC_FILE } -o ${ HIP_FILE }
&& ${ CMAKE_COMMAND } -E env bash ${ CMAKE_CURRENT_SOURCE_DIR } /cmake/scripts/add_unroll.sh ${ HIP_FILE }
MAIN_DEPENDENCY ${ SRC_FILE }
COMMENT "Hipifying ${SRC_FILE} -> ${HIP_FILE}"
)
endif ( )
2023-05-25 16:08:54 -06:00
endforeach ( )
2025-07-17 11:20:49 -05:00
# Adding custom target to hipify all the source files
# This is required to make sure that all the hipified source files are
# available before compiling the unit tests executable(s)
add_custom_target ( hipify_all DEPENDS ${ HIP_SOURCES } )
2024-02-08 14:08:24 -07:00
# Generate device/host tables and all the collective functions that are going to be in librccl.so
#==================================================================================================
2024-10-09 22:36:50 -05:00
find_package ( Python3 COMPONENTS Interpreter REQUIRED )
if ( NOT Python3_FOUND )
message ( FATAL_ERROR "RCCL requires Python3 for generating host/device tables" )
endif ( )
2024-10-03 10:21:19 -04:00
set ( GEN_DIR "${HIPIFY_DIR}/gensrc" )
2025-08-28 15:45:42 -05:00
set ( GEN_SYM_DIR "${GEN_DIR}/symmetric" )
2024-10-03 10:21:19 -04:00
2025-08-28 15:45:42 -05:00
if ( ONLY_FUNCS )
message ( WARNING "Using ONLY_FUNCS = ${ONLY_FUNCS}. Not meant for release builds." )
endif ( )
# Execute the python script to generate required collective functions
2024-10-03 10:21:19 -04:00
execute_process (
2025-07-23 09:04:17 -07:00
COMMAND ${ Python3_EXECUTABLE } ${ CMAKE_SOURCE_DIR } /src/device/generate.py ${ GEN_DIR } ${ IFC_ENABLED } ${ COLLTRACE } ${ ENABLE_MSCCL_KERNEL } ${ BUILD_LOCAL_GPU_TARGET_ONLY } ${ ONLY_FUNCS }
2024-10-03 10:21:19 -04:00
WORKING_DIRECTORY ${ CMAKE_SOURCE_DIR }
2024-10-09 22:36:50 -05:00
RESULT_VARIABLE gen_py_result
ERROR_VARIABLE gen_py_error
2024-10-03 10:21:19 -04:00
)
2024-10-09 22:36:50 -05:00
if ( gen_py_result )
message ( SEND_ERROR "Error: ${gen_py_error}" )
message ( FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/generate.py failed" )
endif ( )
2024-10-03 10:21:19 -04:00
2025-09-15 12:19:35 -04:00
if ( GENERATE_SYM_KERNELS )
# Execute the python script to generate required symmetric memory kernels
execute_process (
COMMAND ${ Python3_EXECUTABLE } ${ CMAKE_SOURCE_DIR } /src/device/symmetric/generate.py ${ GEN_SYM_DIR }
WORKING_DIRECTORY ${ CMAKE_SOURCE_DIR }
RESULT_VARIABLE gen_sym_py_result
ERROR_VARIABLE gen_sym_py_error
)
if ( gen_sym_py_result )
message ( SEND_ERROR "Error: ${gen_sym_py_error}" )
message ( FATAL_ERROR "${CMAKE_SOURCE_DIR}/src/device/symmetric/generate.py failed" )
endif ( )
2025-08-28 15:45:42 -05:00
endif ( )
2024-10-03 10:21:19 -04:00
# Find the generated files in the output directory
2025-08-28 15:45:42 -05:00
file ( GLOB_RECURSE GENERATED_FILES "${GEN_DIR}/*" )
2024-10-03 10:21:19 -04:00
# Append all found generated files to the list
foreach ( file ${ GENERATED_FILES } )
list ( APPEND HIP_SOURCES ${ file } )
endforeach ( )
2023-06-14 09:17:51 -06:00
2023-05-25 16:08:54 -06:00
# Create an initial git_version.cpp file (that will be updated with latest git version)
#==================================================================================================
2025-10-17 07:17:49 -07:00
# Create initial empty file at configure time
2023-05-25 16:08:54 -06:00
file ( WRITE ${ CMAKE_CURRENT_BINARY_DIR } /git_version.cpp "" )
2025-10-17 07:17:49 -07:00
# Add a custom target that always runs at build time to update git version
add_custom_target ( update_git_version
ALL
COMMAND ${ CMAKE_COMMAND } -DRCCL_SOURCE_DIR= ${ CMAKE_CURRENT_SOURCE_DIR } -DRCCL_BINARY_DIR= ${ CMAKE_CURRENT_BINARY_DIR } -P ${ CMAKE_CURRENT_SOURCE_DIR } /cmake/scripts/git_version.cmake
BYPRODUCTS ${ CMAKE_CURRENT_BINARY_DIR } /git_version.cpp
COMMENT "Updating git version information"
2022-08-04 18:03:59 -06:00
VERBATIM
)
2025-10-17 07:17:49 -07:00
list ( APPEND HIP_SOURCES ${ CMAKE_CURRENT_BINARY_DIR } /git_version.cpp )
2025-06-25 21:01:34 -07:00
2023-05-25 16:08:54 -06:00
# Set up RCCL library
#==================================================================================================
## Set RCCL source files
add_library ( rccl ${ HIP_SOURCES } )
## Set RCCL dependencies
2025-10-17 07:17:49 -07:00
## Ensure git version is updated before building rccl
add_dependencies ( rccl update_git_version )
2023-05-25 16:08:54 -06:00
## Set RCCL include directories
target_include_directories ( rccl PRIVATE ${ PROJECT_BINARY_DIR } /include ) # for generated rccl.h header
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src ) # for hipfied headers
2024-01-24 15:25:33 -08:00
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/device )
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/device/network/unpack )
2025-08-28 15:45:42 -05:00
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/include )
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/include/mlx5 )
2026-01-20 13:01:49 -06:00
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/include/nccl_device )
2025-12-23 07:33:10 -08:00
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/include/ionic )
2025-08-28 15:45:42 -05:00
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /src/include/plugin )
2024-10-03 10:21:19 -04:00
target_include_directories ( rccl PRIVATE ${ HIPIFY_DIR } /gensrc )
2023-05-25 16:08:54 -06:00
target_include_directories ( rccl PRIVATE ${ HSA_INCLUDE_PATH } )
target_include_directories ( rccl PRIVATE ${ ROCM_SMI_INCLUDE_DIR } )
2025-10-21 13:53:57 -04:00
target_include_directories ( rccl PRIVATE ${ ROCMCORE_PATH } /include )
2023-05-25 16:08:54 -06:00
if ( DEMANGLE_DIR )
target_include_directories ( rccl PRIVATE ${ DEMANGLE_DIR } )
2019-07-05 15:43:00 -07:00
endif ( )
2025-10-21 13:53:57 -04:00
if ( ROCTX_ENABLE )
target_include_directories ( rccl PRIVATE ${ ROCTRACER_INCLUDE_DIR } )
endif ( )
2019-07-05 15:43:00 -07:00
2026-01-09 14:04:54 -06:00
2023-05-25 16:08:54 -06:00
## Set RCCL compile definitions
if ( COLLTRACE )
target_compile_definitions ( rccl PRIVATE ENABLE_COLLTRACE )
endif ( )
2023-08-02 09:45:18 -06:00
if ( ENABLE_MSCCL_KERNEL )
2026-01-20 13:01:49 -06:00
message ( WARNING "MSCCL is deprecated and will be removed in a future version of RCCL." )
2023-08-02 09:45:18 -06:00
target_compile_definitions ( rccl PRIVATE COMPILE_MSCCL_KERNEL )
endif ( )
2024-07-12 15:32:58 -06:00
if ( ENABLE_MSCCLPP )
target_compile_definitions ( rccl PRIVATE ENABLE_MSCCLPP )
endif ( )
2026-01-21 09:05:47 -06:00
if ( USE_AMDSMI )
target_compile_definitions ( rccl PRIVATE USE_AMDSMI )
else ( )
if ( HAVE_ROCM_SMI64CONFIG )
target_compile_definitions ( rccl PRIVATE USE_ROCM_SMI64CONFIG )
endif ( )
if ( HAVE_ROCM_SMI_THREAD_ONLY_MUTEX )
target_compile_definitions ( rccl PRIVATE USE_ROCM_SMI_THREAD_ONLY_MUTEX )
endif ( )
2024-05-08 14:32:24 -07:00
endif ( )
2025-12-11 19:04:35 -05:00
if ( ENABLE_WARP_SPEED )
target_compile_definitions ( rccl PRIVATE ENABLE_WARP_SPEED )
endif ( )
2026-01-09 14:04:54 -06:00
if ( ENABLE_ROCSHMEM )
target_compile_definitions ( rccl PRIVATE ENABLE_ROCSHMEM )
endif ( )
# ==== rocSHMEM integration (optional) ====
if ( ENABLE_ROCSHMEM )
add_rocshmem_targets ( )
# Ensure rocSHMEM is fully built/installed before compiling rccl
if ( TARGET rocshmem_ext )
add_dependencies ( rccl rocshmem_ext )
endif ( )
if ( ROCSHMEM_INCLUDE_DIR )
target_include_directories ( rccl PRIVATE ${ ROCSHMEM_INCLUDE_DIR } )
endif ( )
# Moved to where MSCCL target_links
## target_link_libraries(rccl PRIVATE ${ROCSHMEM_LIBRARY})
target_link_libraries ( rccl PRIVATE ${ IBVERBS } )
endif ( )
2025-06-23 21:51:49 -05:00
# NPKit flags
## May be better to move these to a separate file
if ( ENABLE_NPKIT )
2026-01-20 13:01:49 -06:00
message ( WARNING "NPKit is deprecated and will be removed in a future version of RCCL. Please consider using alternative profiling tools." )
2025-06-23 21:51:49 -05:00
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_TIME_SYNC_GPU )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_TIME_SYNC_CPU )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_UPDOWN_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_UPDOWN_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_RECV_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_RECV_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_RECV_REDUCE_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_RECV_REDUCE_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_SEND_FROM_OUTPUT_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_DIRECT_SEND_FROM_OUTPUT_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_REDUCE_COPY_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_REDUCE_COPY_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_REDUCE_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_REDUCE_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_REDUCE_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_RECV_REDUCE_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_FROM_OUTPUT_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_FROM_OUTPUT_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_SIMPLE_WAIT_PEER_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_SIMPLE_WAIT_PEER_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_SIMPLE_REDUCE_OR_COPY_MULTI_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_SIMPLE_REDUCE_OR_COPY_MULTI_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL_WAIT_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL_WAIT_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL_DATA_PROCESS_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL_DATA_PROCESS_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL128_WAIT_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL128_WAIT_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL128_DATA_PROCESS_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_PRIM_LL128_DATA_PROCESS_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_NET_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_NET_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_NET_TEST_ENTRY )
2025-07-16 09:08:06 -05:00
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_NET_TEST_EXIT )
2025-06-23 21:51:49 -05:00
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_NET_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_NET_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_RECV_REDUCE_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_RECV_REDUCE_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_DIRECT_RECV_REDUCE_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_DIRECT_RECV_REDUCE_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_DIRECT_RECV_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_DIRECT_RECV_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_DIRECT_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_RING_DIRECT_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_UPDOWN_REDUCE_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_UPDOWN_REDUCE_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_UPDOWN_BROADCAST_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_UPDOWN_BROADCAST_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_REDUCE_BROADCAST_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_REDUCE_BROADCAST_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_REDUCE_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_REDUCE_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_BROADCAST_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_REDUCE_TREE_SPLIT_BROADCAST_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_RECV_LOCAL_COPY_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_RECV_LOCAL_COPY_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_RECV_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_RECV_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_RECV_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_SEND_RECV_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_RECV_COPY_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_RECV_COPY_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_DIRECT_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_ALL_GATHER_RING_DIRECT_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_GENERIC_OP_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_GENERIC_OP_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_REDUCE_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_REDUCE_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_RECV_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_RECV_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_RUN_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_RUN_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_RECV_REDUCE_COPY_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_RECV_REDUCE_COPY_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_INIT_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_MSCCL_INIT_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_BROADCAST_RING_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_BROADCAST_RING_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_RECV_REDUCE_SEND_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_RECV_REDUCE_SEND_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_RECV_REDUCE_COPY_ENTRY )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_EVENT_REDUCE_SCATTER_RING_RECV_REDUCE_COPY_EXIT )
target_compile_definitions ( rccl PRIVATE ENABLE_NPKIT_PRIM_COLLECT_DATA_PROCESS_TIME )
2022-06-21 05:30:19 +08:00
endif ( )
2025-06-23 21:51:49 -05:00
2023-05-25 16:08:54 -06:00
if ( PROFILE )
target_compile_definitions ( rccl PRIVATE ENABLE_PROFILING )
2019-11-26 16:33:13 -08:00
endif ( )
2025-01-29 11:29:46 -05:00
if ( ROCTX_ENABLE )
target_compile_definitions ( rccl PRIVATE ROCTX_ENABLE )
else ( )
2024-02-27 15:46:15 -07:00
target_compile_definitions ( rccl PRIVATE NVTX_NO_IMPL )
target_compile_definitions ( rccl PRIVATE NVTX_DISABLE )
endif ( )
2023-05-25 16:08:54 -06:00
if ( TRACE )
target_compile_definitions ( rccl PRIVATE ENABLE_TRACE )
endif ( )
2024-02-29 10:06:43 -08:00
if ( ${ HIP_CONTIGUOUS_MEMORY } )
target_compile_definitions ( rccl PRIVATE HIP_CONTIGUOUS_MEMORY )
2024-08-25 13:44:22 -04:00
message ( STATUS "HIP_CONTIGUOUS_MEMORY enabled" )
else ( )
message ( STATUS "HIP_CONTIGUOUS_MEMORY disabled" )
2024-02-29 10:06:43 -08:00
endif ( )
2025-02-11 09:48:22 -08:00
if ( "${hip_version_string}" VERSION_GREATER_EQUAL "5.7.31920" )
2023-09-01 07:53:18 -07:00
target_compile_definitions ( rccl PRIVATE HIP_UNCACHED_MEMORY )
message ( STATUS "HIP_UNCACHED_MEMORY enabled" )
else ( )
2024-08-25 13:44:22 -04:00
message ( STATUS "HIP_UNCACHED_MEMORY disabled - requires HIP version >= 5.7.31920" )
2023-09-01 07:53:18 -07:00
# keep --hipcc-func-supp on older HIP and compiler
if ( NOT IFC_ENABLED )
target_compile_options ( rccl PRIVATE --hipcc-func-supp )
2024-08-25 13:44:22 -04:00
message ( STATUS "--hipcc-func-supp enabled" )
else ( )
message ( STATUS "--hipcc-func-supp disabled" )
2023-08-04 10:17:04 -07:00
endif ( )
2023-07-21 07:31:27 -07:00
endif ( )
2025-07-01 14:38:49 -07:00
if ( HIP_HOST_UNCACHED_MEMORY )
target_compile_definitions ( rccl PRIVATE HIP_HOST_UNCACHED_MEMORY )
message ( STATUS "HIP_HOST_UNCACHED_MEMORY enabled" )
else ( )
message ( STATUS "HIP_HOST_UNCACHED_MEMORY disabled" )
endif ( )
2023-05-25 16:08:54 -06:00
if ( BUILD_BFD )
if ( HAVE_BFD )
target_compile_definitions ( rccl PRIVATE HAVE_BFD )
endif ( )
2022-05-20 09:56:38 -06:00
if ( HAVE_DECL_BFD_GET_SECTION_FLAGS )
2023-05-25 16:08:54 -06:00
target_compile_definitions ( rccl PRIVATE HAVE_DECL_BFD_GET_SECTION_FLAGS )
2022-05-20 09:56:38 -06:00
endif ( )
if ( HAVE_DECL_BFD_GET_SECTION_VMA )
2023-05-25 16:08:54 -06:00
target_compile_definitions ( rccl PRIVATE HAVE_DECL_BFD_GET_SECTION_VMA )
2022-05-20 09:56:38 -06:00
endif ( )
if ( HAVE_TWO_ARG_BFD_SECTION_SIZE )
2023-05-25 16:08:54 -06:00
target_compile_definitions ( rccl PRIVATE HAVE_TWO_ARG_BFD_SECTION_SIZE )
2022-05-20 09:56:38 -06:00
endif ( )
endif ( )
2023-05-25 16:08:54 -06:00
if ( IFC_ENABLED )
target_compile_definitions ( rccl PRIVATE USE_INDIRECT_FUNCTION_CALL )
endif ( )
if ( DEMANGLE_DIR )
target_compile_definitions ( rccl PRIVATE "HAVE_CPLUS_DEMANGLE=1" )
target_compile_definitions ( rccl PRIVATE "HAVE_DECL_BASENAME=1" )
2022-04-27 12:18:29 -04:00
endif ( )
2024-10-03 10:21:19 -04:00
if ( LL128_ENABLED )
2024-08-25 13:44:22 -04:00
target_compile_definitions ( rccl PRIVATE ENABLE_LL128 )
2024-01-10 08:01:11 -08:00
endif ( )
2021-10-28 07:26:28 -07:00
2023-05-25 16:08:54 -06:00
## Set RCCL compile options
2024-12-11 12:40:49 -05:00
if ( HAVE_PARALLEL_JOBS )
target_compile_options ( rccl PRIVATE -parallel-jobs=12 )
endif ( )
2025-05-22 20:33:25 -07:00
2025-07-17 11:20:49 -05:00
if ( ROCM_VERSION VERSION_GREATER_EQUAL "60200" )
2025-05-22 20:33:25 -07:00
target_compile_options ( rccl PRIVATE --offload-compress ) # Compress GPU code at compile time.
target_link_libraries ( rccl PRIVATE --offload-compress ) # Compress GPU code at link time.
message ( STATUS "--offload-compress enabled - ROCm version >= 6.2.0" )
else ( )
message ( STATUS "--offload-compress disabled - ROCm version < 6.2.0" )
endif ( )
2025-04-24 10:45:46 -07:00
target_compile_options ( rccl PRIVATE -Werror=uninitialized )
target_compile_options ( rccl PRIVATE -Werror=sometimes-uninitialized )
target_compile_options ( rccl PRIVATE -Wall )
2025-06-13 08:23:31 -07:00
target_compile_options ( rccl PRIVATE -Werror=deprecated-copy-with-user-provided-copy )
2023-05-25 16:08:54 -06:00
target_compile_options ( rccl PRIVATE -Wno-format-nonliteral )
2025-08-05 17:36:23 -05:00
target_compile_options ( rccl PRIVATE -Wno-unused-function )
target_compile_options ( rccl PRIVATE -fgpu-rdc )
2025-06-10 12:12:36 -05:00
2025-11-16 22:35:06 -08:00
if ( QUIET_WARNINGS )
target_compile_options ( rccl PRIVATE -Wno-invalid-offsetof )
target_compile_options ( rccl PRIVATE -Wno-unused-result )
target_compile_options ( rccl PRIVATE -Wno-macro-redefined )
target_compile_options ( rccl PRIVATE -Wno-unused-label )
target_compile_options ( rccl PRIVATE -Wno-unused-variable )
target_compile_options ( rccl PRIVATE -Wno-unused-private-field )
target_compile_options ( rccl PRIVATE -Wno-null-conversion )
target_compile_options ( rccl PRIVATE -Wno-missing-braces )
endif ( )
2025-06-10 12:12:36 -05:00
## Set RCCL compile and linker options for unit tests and code coverage
if ( ENABLE_CODE_COVERAGE )
if ( NOT CMAKE_BUILD_TYPE MATCHES "Debug" )
message ( FATAL_ERROR "Code coverage is enabled, but the build type is '${CMAKE_BUILD_TYPE}'. "
"Code coverage requires 'Debug' build types to expose internal symbols. "
"Please set CMAKE_BUILD_TYPE to 'Debug' and reconfigure." )
endif ( )
message ( STATUS "Code coverage is enabled with build type '${CMAKE_BUILD_TYPE}'." )
target_compile_options ( rccl PRIVATE
2025-12-18 09:20:12 -08:00
-fvisibility=default -Xarch_host -fprofile-instr-generate -Xarch_host -fcoverage-mapping )
2025-06-10 12:12:36 -05:00
set ( COVERAGE_SHARED_LINKER_FLAGS
-fprofile-generate
-Wl,--enable-new-dtags,--build-id=sha1,--rpath, $ ORIGIN
)
set ( COVERAGE_EXE_LINKER_FLAGS
-fprofile-generate
-Wl,--enable-new-dtags,--build-id=sha1,--rpath, $ ORIGIN/../lib
)
target_link_options ( rccl PRIVATE ${ COVERAGE_SHARED_LINKER_FLAGS } )
target_link_options ( rccl PRIVATE ${ COVERAGE_EXE_LINKER_FLAGS } )
2025-12-18 09:20:12 -08:00
elseif ( BUILD_TESTS ) # Enable default/hidden visibility based on build type and ROCM_VERSION
if ( ROCM_VERSION VERSION_GREATER_EQUAL "60400" AND CMAKE_BUILD_TYPE MATCHES "Debug" )
target_compile_options ( rccl PRIVATE -fvisibility=default )
else ( )
target_compile_options ( rccl PRIVATE -fvisibility=hidden )
endif ( )
else ( ) # Enable hidden visibility for library without tests/code coverage enabled
target_compile_options ( rccl PRIVATE -fvisibility=hidden )
2025-06-10 12:12:36 -05:00
endif ( )
2023-10-12 20:17:08 -05:00
if ( HAVE_KERNARG_PRELOAD )
target_compile_options ( rccl PRIVATE -mllvm --amdgpu-kernarg-preload-count=16 )
endif ( )
2019-07-05 15:43:00 -07:00
2025-10-13 09:12:10 -07:00
if ( REPORT_KERNEL_RESOURCE_USE )
target_link_options ( rccl PRIVATE -Rpass-analysis=kernel-resource-usage )
endif ( )
2025-09-23 10:11:32 -07:00
if ( DUMP_ASM ) # Save temporary files from kernel compilation
message ( STATUS "Disassembling librccl.so to asm" )
# Maintain symbols but without changing code. Keep additional data in dwarf section of binary.
target_compile_options ( rccl PRIVATE -gline-tables-only )
set ( OBJ_DUMP ${ ROCM_PATH } /llvm/bin/llvm-objdump )
add_custom_command ( TARGET rccl POST_BUILD
COMMENT "Disassembling RCCL library"
COMMAND /bin/bash -c "${OBJ_DUMP} --offload-fatbin librccl.so"
VERBATIM
)
foreach ( GPUARCH ${ GPU_TARGETS } )
add_custom_command ( TARGET rccl POST_BUILD
COMMENT "Disassembling RCCL library to dump assembly for ${GPUARCH}"
COMMAND /bin/bash -c "${OBJ_DUMP} -d -l --source --symbolize-operands librccl.so.0.hipv4-amdgcn-amd-amdhsa--${GPUARCH} > librccl.${GPUARCH}.s"
VERBATIM
)
endforeach ( )
endif ( )
2023-05-25 16:08:54 -06:00
## NOTE: This is currently being handled by rocm-cmake, however may need to be re-enabled in the future
2025-02-16 22:46:37 -05:00
#foreach(target ${GPU_TARGETS})
2023-05-25 16:08:54 -06:00
# target_compile_options(rccl PRIVATE --offload-arch=${target})
#endforeach()
2020-08-31 16:10:09 +00:00
2023-05-25 16:08:54 -06:00
if ( BUILD_ADDRESS_SANITIZER )
2023-10-19 16:13:39 -07:00
target_compile_options ( rccl PRIVATE -fsanitize=address -shared-libasan )
2019-07-05 15:43:00 -07:00
endif ( )
2023-05-25 16:08:54 -06:00
if ( TIMETRACE )
target_compile_options ( rccl PRIVATE -ftime-trace )
endif ( )
2025-03-20 16:11:43 -07:00
if ( FAULT_INJECTION )
target_compile_definitions ( rccl PRIVATE ENABLE_FAULT_INJECTION )
message ( STATUS "Fault injection enabled" )
endif ( )
2019-07-05 15:43:00 -07:00
2023-05-25 16:08:54 -06:00
## Set RCCL linked library directories
2026-01-21 09:05:47 -06:00
target_link_directories ( rccl PRIVATE ${ SMI_LIB_DIR } )
2019-07-05 15:43:00 -07:00
2024-08-22 12:36:07 -05:00
if ( ROCM_VERSION VERSION_GREATER_EQUAL "60100" )
option ( RCCL_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON )
else ( )
if ( RCCL_ROCPROFILER_REGISTER )
message ( AUTHOR_WARNING "RCCL_ROCPROFILER_REGISTER is not valid option for ROCm < 6.2. Current ROCm version: ${ROCM_VERSION}" )
endif ( )
set ( RCCL_ROCPROFILER_REGISTER OFF CACHE BOOL "" FORCE )
endif ( )
if ( RCCL_ROCPROFILER_REGISTER )
find_package ( rocprofiler-register REQUIRED )
target_compile_definitions ( rccl PRIVATE RCCL_ROCPROFILER_REGISTER=1 )
target_link_libraries (
rccl PRIVATE rocprofiler-register::rocprofiler-register )
endif ( )
2023-05-25 16:08:54 -06:00
## Set RCCL linked libraries
2024-06-03 10:05:47 -06:00
if ( HAVE_BFD )
2023-05-25 16:08:54 -06:00
target_link_libraries ( rccl PRIVATE bfd )
2022-05-20 09:56:38 -06:00
if ( HAVE_IBERTY )
2023-05-25 16:08:54 -06:00
target_link_libraries ( rccl PRIVATE iberty z )
2022-05-20 09:56:38 -06:00
endif ( )
endif ( )
2025-01-29 11:29:46 -05:00
if ( ROCTX_ENABLE )
2025-10-21 13:53:57 -04:00
target_link_libraries ( rccl PRIVATE ${ ROCTX_LIB } )
2024-02-27 15:46:15 -07:00
endif ( )
2025-01-29 11:29:46 -05:00
target_link_libraries ( rccl PRIVATE -fgpu-rdc ) # Required when linking relocatable device code
2024-02-27 15:46:15 -07:00
target_link_libraries ( rccl PRIVATE Threads::Threads )
target_link_libraries ( rccl INTERFACE hip::host )
target_link_libraries ( rccl PRIVATE hip::device )
target_link_libraries ( rccl PRIVATE dl )
2026-01-21 09:05:47 -06:00
target_link_libraries ( rccl PRIVATE ${ SMI_LIBRARIES } )
2025-07-10 17:19:53 -05:00
target_link_libraries ( rccl PRIVATE fmt::fmt-header-only )
2024-09-11 09:55:16 -06:00
if ( ENABLE_MSCCLPP )
target_link_libraries ( rccl PRIVATE mscclpp_nccl )
endif ( )
2026-01-09 14:04:54 -06:00
if ( ENABLE_ROCSHMEM )
target_link_libraries ( rccl PRIVATE ${ ROCSHMEM_LIBRARY } )
target_link_libraries ( rccl PRIVATE ${ IBVERBS } )
endif ( )
2022-05-20 09:56:38 -06:00
2023-05-25 16:08:54 -06:00
## Set RCCL link options
2024-07-30 08:04:14 -07:00
## Find out available memory
execute_process (
COMMAND bash "-c" "cat /sys/fs/cgroup/memory.max"
OUTPUT_VARIABLE memory_max_string )
if ( ${ memory_max_string } MATCHES "^[0-9]+" )
math ( EXPR memory_in_gb "${memory_max_string} / (1024 * 1024 * 1024)" )
else ( )
execute_process (
2025-02-03 09:35:02 -06:00
COMMAND bash "-c" "free | grep -o '[[:digit:]]*' | head -1"
2024-07-30 08:04:14 -07:00
OUTPUT_VARIABLE memory_max_string )
2025-03-20 18:10:01 -06:00
## memory_max_string holds the free memory in KB
2024-07-30 08:04:14 -07:00
if ( ${ memory_max_string } MATCHES "^[0-9]+" )
2025-02-03 09:35:02 -06:00
math ( EXPR memory_in_gb "${memory_max_string} / (1024 * 1024)" ) ## KB to GB conversion
2024-07-30 08:04:14 -07:00
else ( )
cmake_host_system_information ( RESULT memory_max_string QUERY AVAILABLE_PHYSICAL_MEMORY )
math ( EXPR memory_in_gb "${memory_max_string} / 1024" )
endif ( )
endif ( )
## Reserve 16GB for each linker job. Limit max number of linker jobs to 16
2024-12-11 12:40:49 -05:00
if ( HAVE_PARALLEL_JOBS )
math ( EXPR num_linker_jobs "(${memory_in_gb} + 15) / 16" )
if ( ${ num_linker_jobs } GREATER_EQUAL "16" )
set ( num_linker_jobs "16" )
endif ( )
message ( STATUS "Use ${num_linker_jobs} jobs for linking" )
target_link_options ( rccl PRIVATE -parallel-jobs= ${ num_linker_jobs } ) # Use multiple threads to link
2024-07-30 08:04:14 -07:00
endif ( )
2023-05-25 16:08:54 -06:00
if ( BUILD_ADDRESS_SANITIZER )
target_link_options ( rccl PRIVATE -fuse-ld=lld )
endif ( )
if ( TIMETRACE )
target_link_options ( rccl PRIVATE -ftime-trace )
endif ( )
2023-09-26 12:50:54 -07:00
2023-05-25 16:08:54 -06:00
if ( NOT BUILD_SHARED_LIBS )
message ( STATUS "Building static RCCL library" )
else ( )
message ( STATUS "Building shared RCCL library" )
endif ( )
2023-10-12 20:17:08 -05:00
if ( HAVE_KERNARG_PRELOAD )
2024-12-11 16:48:18 -05:00
target_link_options ( rccl PRIVATE "SHELL:-Xoffload-linker -mllvm=-amdgpu-kernarg-preload-count=16" )
2023-10-12 20:17:08 -05:00
endif ( )
2023-05-25 16:08:54 -06:00
2024-09-11 09:55:16 -06:00
if ( ENABLE_MSCCLPP )
2026-01-26 23:12:16 -07:00
add_mscclpp_targets ( )
2024-07-12 15:32:58 -06:00
endif ( )
2023-05-25 16:08:54 -06:00
## Track linking time
set_property ( TARGET rccl PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time" )
## Setup librccl.so version
2020-01-08 21:28:16 -08:00
rocm_set_soversion ( rccl "1.0" )
2024-10-16 09:58:50 -04:00
if ( NOT BUILD_SHARED_LIBS )
# To create a static lib with `-fgpu-rdc`, you need `--emit-static-lib` and `--hip-link`.
# You also need to invoke amdclang++ again to trigger GPU code generation.
set ( static_link_flags
${ CXXFLAGS }
--hip-link
-fgpu-rdc
--emit-static-lib
)
# Find all the libraries we need to link at link time to include them in the clang link
# command line.
get_target_property ( rccl_libs rccl LINK_LIBRARIES )
foreach ( target ${ rccl_libs } )
if ( TARGET ${ target } )
get_target_property ( location ${ target } LOCATION )
if ( location )
LIST ( APPEND static_link_flags -l ${ location } )
endif ( )
endif ( )
endforeach ( )
2025-02-16 22:46:37 -05:00
foreach ( target ${ GPU_TARGETS } )
2024-10-16 09:58:50 -04:00
list ( APPEND static_link_flags --offload-arch= ${ target } )
endforeach ( )
list ( JOIN static_link_flags " " flags_str )
# Invoking amdclang++ this way will produce a static archive, so just override ARCHIVE_CREATE.
set ( CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_CXX_COMPILER> ${flags_str} -o <TARGET> <OBJECTS>" )
endif ( )
2023-05-25 16:08:54 -06:00
# Install settings
#==================================================================================================
## Specify install targets
rocm_install_targets ( TARGETS rccl )
2025-06-20 07:53:59 -05:00
rocm_install ( FILES ${ PROJECT_BINARY_DIR } /include/rccl/rccl.h src/include/plugin/nccl_net.h
2023-05-25 16:08:54 -06:00
DESTINATION ${ CMAKE_INSTALL_INCLUDEDIR } /rccl )
2024-08-22 12:36:07 -05:00
rocm_install ( FILES src/include/api_trace.h
DESTINATION ${ CMAKE_INSTALL_INCLUDEDIR } /rccl/amd_detail )
2023-03-15 05:34:25 +08:00
file ( COPY tools/msccl-algorithms DESTINATION ${ PROJECT_BINARY_DIR } )
file ( COPY tools/msccl-unit-test-algorithms DESTINATION ${ PROJECT_BINARY_DIR } )
2023-06-23 10:57:20 -04:00
## Install Algorithm files under share folder
2024-03-26 11:11:09 -04:00
rocm_install ( DIRECTORY ${ PROJECT_BINARY_DIR } /msccl-algorithms DESTINATION ${ CMAKE_INSTALL_DATADIR } /rccl )
rocm_install ( DIRECTORY ${ PROJECT_BINARY_DIR } /msccl-unit-test-algorithms DESTINATION ${ CMAKE_INSTALL_DATADIR } /rccl )
2019-07-05 15:43:00 -07:00
2023-05-25 16:08:54 -06:00
rocm_export_targets (
NAMESPACE roc::
TARGETS rccl
DEPENDS hip )
## Set package dependencies
2024-04-09 19:27:07 -04:00
if ( BUILD_ADDRESS_SANITIZER )
set ( DEPENDS_HIP_RUNTIME "hip-runtime-amd-asan" )
else ( )
set ( DEPENDS_HIP_RUNTIME "hip-runtime-amd" )
endif ( )
2026-01-21 09:05:47 -06:00
rocm_package_add_dependencies ( DEPENDS "${DEPENDS_HIP_RUNTIME} >= 4.5.0" "${SMI_LIB_NAME}" )
2025-07-01 16:37:14 -05:00
set ( CPACK_DEB_COMPONENT_INSTALL ON )
2020-10-21 16:20:53 -07:00
set ( CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON )
2025-07-01 16:37:14 -05:00
set ( CPACK_RPM_COMPONENT_INSTALL ON )
2021-06-21 08:29:23 -07:00
set ( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt" "${ROCM_PATH}" )
2019-07-05 15:43:00 -07:00
2020-10-21 16:20:53 -07:00
find_file ( DEBIAN debian_version debconf.conf PATHS /etc )
if ( DEBIAN )
# Write copyright file
file ( WRITE "${CMAKE_BINARY_DIR}/copyright"
"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: rccl
2024-03-25 16:29:13 -06:00
Source: https://github.com/ROCm/rccl
2020-10-21 16:20:53 -07:00
Files: *
Copyright: (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
2023-02-04 01:43:38 +00:00
Modifications Copyright (c) 2020-2023 Advanced Micro Devices, Inc. All rights reserved.
2023-03-15 05:34:25 +08:00
Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License.
2020-10-21 16:20:53 -07:00
License: See LICENSE.txt for license information\n" )
2024-03-26 11:11:09 -04:00
rocm_install ( FILES "${CMAKE_BINARY_DIR}/copyright" DESTINATION ${ CMAKE_INSTALL_DATADIR } /rccl )
2020-10-21 16:20:53 -07:00
# Write changelog file
find_program ( date_executable date )
execute_process ( COMMAND ${ date_executable } -R OUTPUT_VARIABLE TIMESTAMP )
file ( WRITE "${CMAKE_BINARY_DIR}/changelog"
"rccl (${VERSION_STRING}-1) unstable; urgency=medium
* Initial release.
-- RCCL Maintainer <rccl-maintainer@amd.com> ${TIMESTAMP}\n" )
find_program ( gzip_executable gzip )
2025-08-27 13:29:07 -04:00
execute_process ( COMMAND bash "-c" "${gzip_executable} -9 -c -n ${CMAKE_BINARY_DIR}/changelog"
2020-10-21 16:20:53 -07:00
WORKING_DIRECTORY ${ CMAKE_BINARY_DIR } OUTPUT_FILE "${CMAKE_BINARY_DIR}/changelog.Debian.gz" )
2024-03-26 11:11:09 -04:00
rocm_install ( FILES "${CMAKE_BINARY_DIR}/changelog.Debian.gz" DESTINATION ${ CMAKE_INSTALL_DATADIR } /rccl )
2020-10-21 16:20:53 -07:00
set ( CPACK_DEBIAN_PACKAGE_DESCRIPTION "ROCm Communication Collectives Library
Optimized primitives for collective multi-GPU communication" )
endif ( )
2025-06-29 18:53:16 -05:00
## Building RCCL RAS
include ( cmake/rcclRAS.cmake )
2022-03-21 15:04:14 -06:00
if ( BUILD_TESTS )
rocm_package_setup_component ( clients )
2023-12-15 12:39:17 -07:00
rocm_package_setup_client_component ( tests PACKAGE_NAME unittests )
2022-03-21 15:04:14 -06:00
add_subdirectory ( test )
2024-02-12 17:56:15 -07:00
2024-03-15 12:18:18 -05:00
if ( BUILD_SHARED_LIBS )
add_custom_command ( TARGET rccl POST_BUILD
COMMENT "Extracting metadata from librccl.so"
COMMAND COMMAND ${ CMAKE_COMMAND } -P ${ CMAKE_CURRENT_SOURCE_DIR } /cmake/scripts/extract_metadata.cmake
VERBATIM
)
endif ( )
2022-03-21 15:04:14 -06:00
endif ( )
2019-07-05 15:43:00 -07:00
rocm_create_package (
2023-05-25 16:08:54 -06:00
NAME rccl
DESCRIPTION "ROCm Communication Collectives Library"
MAINTAINER "RCCL Maintainer <rccl-maintainer@amd.com>"
2019-07-05 15:43:00 -07:00
LDCONFIG )