From 777d8747a510f321c55b1186d7bdcedb36af43fc Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Thu, 25 May 2023 16:08:54 -0600 Subject: [PATCH] Refactoring CMakeFiles (#755) --- CMakeLists.txt | 1014 ++++++++++++++++++++------------------ README.md | 3 +- cmake/Dependencies.cmake | 30 +- install.sh | 193 ++++---- test/CMakeLists.txt | 2 + 5 files changed, 669 insertions(+), 573 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c00c04641..645fb78897 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,63 +1,198 @@ -# Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. # Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License. cmake_minimum_required(VERSION 3.5) -INCLUDE(CheckIncludeFiles) -INCLUDE(CheckSymbolExists) -# We use C++14 features, this will add compile option: -std=c++14 -set( CMAKE_CXX_STANDARD 14 ) -# Without this line, it will add -std=gnu++14 instead, which has some issues. -set( CMAKE_CXX_EXTENSIONS OFF ) -#Adding pthread flag for linking -set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") +# RCCL project +#================================================================================================== project(rccl CXX) -include(cmake/Dependencies.cmake) +# Build options +#================================================================================================== +option(BUILD_ADDRESS_SANITIZER "Enable address sanitizer" OFF) +option(BUILD_ALLREDUCE_ONLY "AllReduce(sum,float) kernel only" OFF) +option(BUILD_BFD "Enable custom backtrace (if bfd.h exists)" ON) +option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "File/folder reorg with backward compatibility" ON) +option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on this machine" OFF) +option(BUILD_SHARED_LIBS "Build as shared library" ON) +option(BUILD_TESTS "Build unit test programs" OFF) +option(COLLTRACE "Collective Trace Option" ON) +option(ENABLE_IFC "Enable indirect function call" ON) +option(INSTALL_DEPENDENCIES "Force install dependencies" OFF) +option(PROFILE "Enable profiling" OFF) +option(TIMETRACE "Enable time-trace during compilation" OFF) +option(TRACE "Enable additional tracing" OFF) -# Detect compiler support for target ID -# This section is deprecated. Please use rocm_check_target_ids for future use. -if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" - OUTPUT_VARIABLE CXX_OUTPUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_STRIP_TRAILING_WHITESPACE) - string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) +# Default GPU architectures to build +#================================================================================================== +set(DEFAULT_GPUS + gfx803 + gfx900:xnack- + gfx906:xnack- + gfx908:xnack- + gfx90a:xnack- + gfx90a:xnack+ + gfx1030 + gfx1100 + gfx1101 + gfx1102) + +# Load CMake modules +#================================================================================================== +include(CheckIncludeFiles) +include(CheckSymbolExists) +include(cmake/Dependencies.cmake) # GTest, rocm-cmake, rocm_local_targets + +# Determine which GPU architectures to build for +if (BUILD_LOCAL_GPU_TARGET_ONLY) + message(STATUS "Building only for local GPU target") + if (COMMAND rocm_local_targets) + rocm_local_targets(DEFAULT_GPUS) + else() + message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs") + endif() endif() - -if(NOT DEFINED ROCM_PATH) - get_filename_component(_real_path ${CMAKE_CXX_COMPILER} REALPATH) - get_filename_component(_new_path "${_real_path}" DIRECTORY) - get_filename_component(ROCM_PATH "${_new_path}/../.." REALPATH) -endif() - -set(CMAKE_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "") - -#Set the AMDGPU_TARGETS with backward compatiblity if(COMMAND rocm_check_target_ids) - rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx1030;gfx1100;gfx1101;gfx1102" - ) + message(STATUS "Checking for ROCm support for GPU targets:") + rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${DEFAULT_GPUS}) else() - # Use target ID syntax if supported for AMDGPU_TARGETS - if(TARGET_ID_SUPPORT) - set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030;gfx1100;gfx1101;gfx1102") - else() - set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908") - endif() + message(WARNING "Unable to check for supported GPU targets. Falling back to default GPUs") + set(SUPPORTED_GPUS ${DEFAULT_GPUS}) endif() -set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") +set(AMDGPU_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "AMD GPU targets to compile for" FORCE) +set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for" FORCE) +message(STATUS "Compiling for ${GPU_TARGETS}") -option(BUILD_TESTS "Build test programs" OFF) -option(INSTALL_DEPENDENCIES "Force install dependencies" OFF) -option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) -option(BUILD_ALLREDUCE_ONLY "Build AllReduce + sum + float kernel only" OFF) -#Set the header wrapper ON by default. -option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" ON) +## NOTE: Reload rocm-cmake in order to update GPU_TARGETS +include(cmake/Dependencies.cmake) # Reloading to use desired GPU_TARGETS instead of defaults -# parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist -# NCCL_SUFFIX is optional NCCL_VERSION formatting is ((X) * 1000 + (Y) * 100 + -# (Z)) so we must first detect one or two digits first +# Try to establish ROCM_PATH (for find_package) +#================================================================================================== +if(NOT DEFINED ROCM_PATH) + if("${CMAKE_CXX_COMPILER}" MATCHES ".*hipcc$") + # Guess based on provided compiler location + get_filename_component(_real_path ${CMAKE_CXX_COMPILER} REALPATH) + get_filename_component(_new_path "${_real_path}" DIRECTORY) + get_filename_component(ROCM_PATH "${_new_path}/.." REALPATH) + message(STATUS "Setting ROCM_PATH based on hipcc location to ${ROCM_PATH}") + else() + # Guess default location + set(ROCM_PATH "/opt/rocm") + message(WARNING "Unable to find ROCM_PATH: Falling back to ${ROCM_PATH}") + endif() +else() + message(STATUS "ROCM_PATH found: ${ROCM_PATH}") +endif() + +# Set CMAKE flags +#================================================================================================== +set(CMAKE_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "") +set(CMAKE_CXX_STANDARD 14) # We use C++14 features, this will add compile option: -std=c++14 +set(CMAKE_CXX_EXTENSIONS OFF) # Without this line, it will add -std=gnu++14 instead, which has some issues. +list(APPEND CMAKE_PREFIX_PATH # Add ROCM_PATH to CMake search paths (for finding HIP / HSA + ${ROCM_PATH} + ${ROCM_PATH}/hip + ${ROCM_PATH}/llvm + ${ROCM_PATH}/hcc) + +# Check for required dependencies +#================================================================================================== +## Check for Threads +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +## Check for HIP +find_package(hip REQUIRED) +message(STATUS "HIP compiler: ${HIP_COMPILER}") +message(STATUS "HIP runtime: ${HIP_RUNTIME}") +if(NOT "${HIP_COMPILER}" MATCHES "clang") + message(FATAL_ERROR "RCCL requires clang-based compiler (hipcc)") +endif() +find_program(hipcc_executable hipcc) +message(STATUS "hipcc executable: ${hipcc_executable}") +execute_process( + COMMAND bash "-c" "${hipcc_executable} --version | grep 'HIP version' | awk -F\" \" '{ printf $3}' | awk -F\"-\" '{ printf $1}'" + OUTPUT_VARIABLE hipcc_version_string) +message(STATUS "hipcc version: ${hipcc_version_string}") + +### Check for hipEventDisableSystemFence support +check_symbol_exists("hipEventDisableSystemFence" "hip/hip_runtime_api.h" HIP_EVENT_DISABLE_FENCE) + +### Check for indirect function call support +if(ENABLE_IFC) + if(${hipcc_version_string} VERSION_GREATER_EQUAL "5.5.30201") + set(IFC_ENABLED ON) + message(STATUS "Indirect function call enabled") + else() + set(IFC_ENABLED OFF) + message(WARNING "Indirect function call disabled - requires hipcc version >= 5.5.30201") + endif() +endif() + +## Check for hsa-runtime64 +find_package(hsa-runtime64 REQUIRED) +get_target_property(HSA_INCLUDE_PATH hsa-runtime64::hsa-runtime64 INTERFACE_INCLUDE_DIRECTORIES) +message(STATUS "HSA runtime: ${HSA_INCLUDE_PATH}") +check_symbol_exists("HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED" "hsa/hsa.h" HAS_HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED) +message(STATUS "HSA DMABUF support: ${HAS_HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED}") + +## Check for ROCM-smi +find_package(rocm_smi PATHS ${ROCM_PATH}/lib/cmake/rocm_smi) +if (rocm_smi_FOUND) + message(STATUS "Found rocm_smi at ${ROCM_SMI_INCLUDE_DIR}") +else() + message(STATUS "Checking old include directory structure for rocm_smi") + set(ROCM_SMI_INCLUDE_DIR "${ROCM_PATH}/rocm_smi/include") + set(ROCM_SMI_LIB_DIR "${ROCM_PATH}/rocm_smi/lib") + set(ROCM_SMI_LIBRARIES rocm_smi64) +endif() +check_include_file_cxx("${ROCM_SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG) + +## Check for BFD library if custom backtrace is requested +if(BUILD_BFD) + enable_language(C) + check_include_files(bfd.h HAVE_BFD) + if (HAVE_BFD) + message(STATUS "-- Found BFD support") + + # Check for specific BFD feature support + CHECK_SYMBOL_EXISTS(bfd_get_section_flags "bfd.h" HAVE_DECL_BFD_GET_SECTION_FLAGS) + CHECK_SYMBOL_EXISTS(bfd_get_section_vma "bfd.h" HAVE_DECL_BFD_GET_SECTION_VMA) + CHECK_CXX_SOURCE_COMPILES( + "#include + + int main (int argc, char **argv){ + bfd_size_type size; + bfd abfd; + asection sec; + size = bfd_section_size(&abfd, &sec); + return (int)(size); + }" + HAVE_TWO_ARG_BFD_SECTION_SIZE) + + # Check for iberty support + find_library(HAVE_IBERTY iberty PATHS /usr/lib64 /usr/lib/ PATH_SUFFIXES x86_64-linux-gnu) + if(HAVE_IBERTY) + message(STATUS "iberty found @ ${HAVE_IBERTY}") + endif() + + # Check for demangle support + find_path(DEMANGLE_DIR demangle.h PATHS /usr/include PATH_SUFFIXES libiberty) + if(NOT DEMANGLE_DIR) + message(WARNING "Could not find demangle.h ${DEMANGLE_DIR}") + else() + message(STATUS "Found demangle.h in ${DEMANGLE_DIR}") + endif() + else() + message(WARNING "bfd.h header not found - Disabling custom backtrace") + endif() +endif() + +# Determine version from makefiles/version.mk and fill in templates +#================================================================================================== +## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist +## NCCL_SUFFIX is optional +## NCCL_VERSION formatting is ((X) * 1000 + (Y) * 100 + (Z)) so we must first detect one or two digits first file(READ makefiles/version.mk version_mk_text) if("${version_mk_text}" MATCHES "NCCL_MAJOR *:= *([0-9]*)") set(NCCL_MAJOR ${CMAKE_MATCH_1}) @@ -90,55 +225,193 @@ else() set(NCCL_VERSION "${NCCL_MAJOR}${NCCL_MINOR}0${NCCL_PATCH}") endif() -# Setup VERSION +## Setup VERSION set(VERSION_STRING "${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}") rocm_setup_version(VERSION ${VERSION_STRING}) -list(APPEND CMAKE_PREFIX_PATH - ${ROCM_PATH} - ${ROCM_PATH}/hip - ${ROCM_PATH}/llvm - ${ROCM_PATH}/hcc) +## Fill in version information for main header file +configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/rccl/rccl.h) # For external linking +configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used by some internal files -find_package(hip REQUIRED) -message(STATUS "HIP compiler: ${HIP_COMPILER}") -message(STATUS "HIP runtime: ${HIP_RUNTIME}") -check_symbol_exists("hipEventDisableSystemFence" "hip/hip_runtime_api.h" HIP_EVENT_DISABLE_FENCE) -if(${HIP_EVENT_DISABLE_FENCE}) - add_definitions(-DHIP_EVENT_DISABLE_FENCE) -endif() - -find_package(hsa-runtime64 REQUIRED) -get_target_property(HSA_INCLUDE_PATH hsa-runtime64::hsa-runtime64 INTERFACE_INCLUDE_DIRECTORIES) -message(STATUS "HSA runtime: ${HSA_INCLUDE_PATH}") -set(CMAKE_REQUIRED_INCLUDES ${HSA_INCLUDE_PATH}) -check_symbol_exists("HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED" "hsa.h" HAS_HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED) -if(${HAS_HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED}) - add_definitions(-DQUERY_HSA_DMABUF) -endif() - -if(BUILD_STATIC) - option(BUILD_SHARED_LIBS "Build as a shared library" OFF) -else() - option(BUILD_SHARED_LIBS "Build as a shared library" ON) -endif() - -if(BUILD_ADDRESS_SANITIZER) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") - add_link_options(-fuse-ld=lld) -endif() - -configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/rccl/rccl.h) -configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/rccl/nccl.h) - -include_directories(${PROJECT_BINARY_DIR}/include) # for generated rccl.h header -include_directories(${PROJECT_BINARY_DIR}/include/rccl) # for generated rccl.h header -include_directories(${PROJECT_BINARY_DIR}/src/include) # for hipified header files -include_directories(src) -include_directories(src/collectives) -include_directories(src/collectives/device) +# Collect list of all source files +#================================================================================================== +# E.g: find src -type f \( -name "*.cc" -o -name "*.h" -o -name "*.hpp" \) | sort +set(SRC_FILES + src/bootstrap.cc + src/channel.cc +# src/clique/AllReduceCliqueKernel.h +# src/clique/CliqueCommon.h +# src/clique/CliqueManager.cc +# src/clique/CliqueManager.h +# src/clique/CliqueShmNames.h +# src/clique/HandleCache.cc +# src/clique/HandleCache.h +# src/clique/HandleShm.cc +# src/clique/HandleShm.h +# src/clique/Hash.cc +# src/clique/Hash.h +# src/clique/MsgQueue.cc +# src/clique/MsgQueue.h +# src/clique/SharedMemHelper.h +# src/clique/ShmObject.cc +# src/clique/ShmObject.h + src/collectives/all_gather.cc + src/collectives/all_reduce.cc + src/collectives/all_to_all.cc + src/collectives/all_to_allv.cc + src/collectives/broadcast.cc + src/collectives/device/all_gather.h + src/collectives/device/all_reduce.h + src/collectives/device/alltoall_pivot.h + src/collectives/device/broadcast.h + src/collectives/device/common.h + src/collectives/device/common_kernel.h + src/collectives/device/op128.h + src/collectives/device/primitives.h + src/collectives/device/prims_ll128.h + src/collectives/device/prims_ll.h + src/collectives/device/prims_simple.h + src/collectives/device/reduce.h + src/collectives/device/reduce_kernel.h + src/collectives/device/reduce_scatter.h + src/collectives/device/sendrecv.h + src/collectives/gather.cc + src/collectives/msccl.cc + src/collectives/reduce.cc + src/collectives/reduce_scatter.cc + src/collectives/scatter.cc + src/collectives/sendrecv.cc + src/debug.cc + src/enhcompat.cc + src/enqueue.cc + src/graph/connect.cc + src/graph/paths.cc + src/graph/rings.cc + src/graph/rings.h + src/graph/rome_models.cc + src/graph/rome_models.h + src/graph/search.cc + src/graph/topo.cc + src/graph/topo.h + src/graph/trees.cc + src/graph/tuning.cc + src/graph/xml.cc + src/graph/xml.h + src/group.cc + src/include/align.h + src/include/alloc.h + src/include/argcheck.h + src/include/BfdBacktrace.hpp + src/include/bootstrap.h + src/include/channel.h + src/include/checks.h + src/include/collectives.h + src/include/coll_net.h + src/include/comm.h + src/include/core.h + src/include/cpuset.h +# src/include/cudawrap.h + src/include/debug.h + src/include/devcomm.h + src/include/enqueue.h + src/include/gdrwrap.h + src/include/git_version.h + src/include/graph.h + src/include/group.h + src/include/ibvwrap.h + src/include/info.h + src/include/ipcsocket.h + src/include/msccl/msccl_kernel.h + src/include/msccl/msccl_lifecycle.h + src/include/msccl/msccl_parser.h + src/include/msccl/msccl_scheduler.h + src/include/msccl/msccl_setup.h + src/include/msccl/msccl_status.h + src/include/msccl/msccl_struct.h + src/include/nccl_net.h + src/include/net.h + src/include/npkit/npkit_event.h + src/include/npkit/npkit.h + src/include/npkit/npkit_struct.h + src/include/nvmlwrap.h + src/include/nvtx3/nvToolsExtCuda.h + src/include/nvtx3/nvToolsExtCudaRt.h + src/include/nvtx3/nvToolsExt.h + src/include/nvtx3/nvToolsExtOpenCL.h + src/include/nvtx3/nvToolsExtPayload.h + src/include/nvtx3/nvToolsExtSync.h + src/include/nvtx3/nvtx3.hpp + src/include/nvtx3/nvtxDetail/nvtxImplCore.h + src/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h + src/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h + src/include/nvtx3/nvtxDetail/nvtxImpl.h + src/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h + src/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h + src/include/nvtx3/nvtxDetail/nvtxInitDecls.h + src/include/nvtx3/nvtxDetail/nvtxInitDefs.h + src/include/nvtx3/nvtxDetail/nvtxInit.h + src/include/nvtx3/nvtxDetail/nvtxLinkOnce.h + src/include/nvtx3/nvtxDetail/nvtxTypes.h + src/include/nvtx3/nvtxExtDetail/nvtxExtImpl.h + src/include/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h + src/include/nvtx3/nvtxExtDetail/nvtxExtInit.h + src/include/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h + src/include/nvtx3/nvtxExtDetail/nvtxExtTypes.h + src/include/nvtx.h + src/include/nvtx_stub.h + src/include/p2p.h + src/include/param.h + src/include/profiler.h + src/include/proxy.h + src/include/rccl_bfloat16.h + src/include/rccl_vars.h + src/include/rocm_smi_wrap.h + src/include/rocmwrap.h + src/include/shm.h + src/include/signals.h + src/include/socket.h + src/include/strongstream.h + src/include/timer.h + src/include/transport.h + src/include/trees.h + src/include/utils.h + src/init.cc + src/init_nvtx.cc + src/misc/argcheck.cc +# src/misc/cudawrap.cc +# src/misc/gdrwrap.cc + src/misc/ibvwrap.cc + src/misc/ipcsocket.cc + src/misc/msccl/msccl_lifecycle.cc + src/misc/msccl/msccl_parser.cc + src/misc/msccl/msccl_setup.cc + src/misc/msccl/msccl_status.cc + src/misc/npkit.cc +# src/misc/nvmlwrap.cc + src/misc/nvmlwrap_stub.cc + src/misc/param.cc + src/misc/profiler.cc + src/misc/rocm_smi_wrap.cc + src/misc/rocmwrap.cc + src/misc/shmutils.cc + src/misc/signals.cc + src/misc/socket.cc + src/misc/strongstream.cc + src/misc/utils.cc + src/net.cc + src/proxy.cc + src/transport.cc + src/transport/coll_net.cc + src/transport/net.cc + src/transport/net_ib.cc + src/transport/net_socket.cc + src/transport/nvls.cc + src/transport/p2p.cc + src/transport/shm.cc +) +## Add kernel files +## E.g: find src -type f \( -name "*.u" \) | sort if (BUILD_ALLREDUCE_ONLY) add_definitions(-DBUILD_ALLREDUCE_ONLY) set(CU_SOURCES @@ -148,411 +421,223 @@ if (BUILD_ALLREDUCE_ONLY) src/collectives/device/msccl_kernel.cu) else() set(CU_SOURCES - src/collectives/device/all_reduce.cu src/collectives/device/all_gather.cu + src/collectives/device/all_reduce.cu src/collectives/device/alltoall_pivot.cu - src/collectives/device/reduce.cu src/collectives/device/broadcast.cu - src/collectives/device/reduce_scatter.cu - src/collectives/device/sendrecv.cu - src/collectives/device/onerank_reduce.cu src/collectives/device/functions.cu - src/collectives/device/msccl_kernel.cu) + src/collectives/device/msccl_kernel.cu + src/collectives/device/onerank_reduce.cu + src/collectives/device/reduce.cu + src/collectives/device/reduce_scatter.cu + src/collectives/device/sendrecv.cu) endif() +list(APPEND SRC_FILES ${CU_SOURCES}) -set(CPP_SOURCES) -foreach(filename ${CU_SOURCES}) - string(REPLACE ".cu" - ".cpp" - cpp_filename - ${filename}) - configure_file(${filename} ${cpp_filename} COPYONLY) - list(APPEND CPP_SOURCES ${cpp_filename}) -endforeach(filename) +# Hipify source files (copy of source generated into hipify directory) +#================================================================================================== +find_program(hipify-perl_executable hipify-perl) +set(HIPIFY_DIR "${CMAKE_CURRENT_BINARY_DIR}/hipify") -set(HEADER_SOURCES - src/include/collectives.h - src/include/align.h - src/include/profiler.h - src/include/alloc.h - src/include/ibvwrap.h - src/include/gdrwrap.h - src/include/utils.h - src/include/strongstream.h - src/include/comm.h - src/include/trees.h - src/include/rccl_vars.h - src/include/checks.h - src/include/p2p.h - src/include/timer.h - src/include/coll_net.h - src/include/signals.h - src/include/proxy.h - src/include/net.h - src/include/devcomm.h - src/include/enqueue.h - src/include/debug.h - src/include/argcheck.h - src/include/rocm_smi_wrap.h - src/include/bootstrap.h - src/include/BfdBacktrace.hpp - src/include/nccl_net.h - src/include/cudawrap.h - src/include/rccl_bfloat16.h - src/include/shm.h - src/include/transport.h - src/include/group.h - src/include/socket.h - src/include/cpuset.h - src/include/rocmwrap.h - src/include/graph.h - src/include/nvmlwrap.h - src/include/param.h - src/include/channel.h - src/include/nvtx_stub.h - src/include/core.h - src/include/info.h - src/include/ipcsocket.h - src/include/git_version.h - src/include/npkit/npkit_event.h - src/include/npkit/npkit.h - src/include/npkit/npkit_struct.h - src/include/nvtx3/nvToolsExtPayload.h - src/include/nvtx3/nvToolsExt.h - src/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h - src/include/nvtx3/nvtxDetail/nvtxTypes.h - src/include/nvtx3/nvtxDetail/nvtxImpl.h - src/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h - src/include/nvtx3/nvtxDetail/nvtxInitDecls.h - src/include/nvtx3/nvtxDetail/nvtxLinkOnce.h - src/include/nvtx3/nvtxDetail/nvtxImplCore.h - src/include/nvtx3/nvtxDetail/nvtxInitDefs.h - src/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h - src/include/nvtx3/nvtxDetail/nvtxInit.h - src/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h - src/include/nvtx3/nvToolsExtSync.h - src/include/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h - src/include/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h - src/include/nvtx3/nvtxExtDetail/nvtxExtTypes.h - src/include/nvtx3/nvtxExtDetail/nvtxExtInit.h - src/include/nvtx3/nvtxExtDetail/nvtxExtImpl.h - src/include/nvtx3/nvToolsExtCudaRt.h - src/include/nvtx3/nvToolsExtCuda.h - src/include/nvtx3/nvToolsExtOpenCL.h - src/include/msccl/msccl_kernel.h - src/include/msccl/msccl_lifecycle.h - src/include/msccl/msccl_parser.h - src/include/msccl/msccl_setup.h - src/include/msccl/msccl_scheduler.h - src/include/msccl/msccl_status.h - src/include/msccl/msccl_struct.h - src/graph/rings.h - src/graph/rome_models.h - src/graph/topo.h - src/graph/xml.h) -foreach(filename ${HEADER_SOURCES}) - configure_file(${PROJECT_SOURCE_DIR}/${filename} ${filename} COPYONLY) -endforeach(filename) +## Loop over each source file to hipify +foreach(SRC_FILE ${SRC_FILES}) + # Check that file exists + if (NOT EXISTS ${CMAKE_SOURCE_DIR}/${SRC_FILE}) + message(FATAL_ERROR "Unable to find file listed in CMakeLists.txt: ${CMAKE_SOURCE_DIR}/${SRC_FILE}") + endif() -set(API_SOURCES - src/collectives/all_reduce.cc - src/collectives/all_gather.cc - src/collectives/all_to_all.cc - src/collectives/all_to_allv.cc - src/collectives/reduce.cc - src/collectives/broadcast.cc - src/collectives/reduce_scatter.cc - src/collectives/scatter.cc - src/collectives/gather.cc - src/collectives/sendrecv.cc - src/collectives/msccl.cc - src/net.cc) -foreach(filename ${API_SOURCES}) - string(REPLACE ".cc" - "_api.cpp" - cpp_filename - ${filename}) - configure_file(${filename} ${cpp_filename} COPYONLY) - list(APPEND CPP_SOURCES ${cpp_filename}) -endforeach(filename) + # Establish hipified copy of the source file + set(HIP_FILE "${HIPIFY_DIR}/${SRC_FILE}") + get_filename_component(HIP_FILE_DIR ${HIP_FILE} DIRECTORY) -set(CC_SOURCES - src/init.cc - src/graph/trees.cc - src/graph/rings.cc - src/graph/paths.cc - src/graph/search.cc - src/graph/connect.cc - src/graph/tuning.cc - src/graph/topo.cc - src/graph/xml.cc - src/graph/rome_models.cc - src/channel.cc - src/misc/argcheck.cc - src/misc/nvmlwrap_stub.cc - src/misc/utils.cc - src/misc/ibvwrap.cc - src/misc/nvmlwrap_stub.cc - src/misc/rocm_smi_wrap.cc - src/misc/profiler.cc - src/misc/npkit.cc - src/misc/shmutils.cc - src/misc/signals.cc # RCCL - src/misc/socket.cc - src/misc/param.cc - src/misc/rocmwrap.cc - src/misc/ipcsocket.cc - src/misc/strongstream.cc - src/misc/msccl/msccl_lifecycle.cc - src/misc/msccl/msccl_parser.cc - src/misc/msccl/msccl_setup.cc - src/misc/msccl/msccl_status.cc - src/transport/coll_net.cc - src/transport/net.cc - src/transport/net_ib.cc - src/transport/net_socket.cc - src/transport/p2p.cc - src/transport/shm.cc - src/transport.cc - src/debug.cc - src/group.cc - src/bootstrap.cc - src/proxy.cc - src/enqueue.cc) -foreach(filename ${CC_SOURCES}) - string(REPLACE ".cc" - ".cpp" - cpp_filename - ${filename}) - configure_file(${filename} ${cpp_filename} COPYONLY) - list(APPEND CPP_SOURCES ${cpp_filename}) -endforeach(filename) + # Convert .cu files to .cpp so that they get processed properly + string(REPLACE "\.cu" "\.cu.cpp" HIP_FILE ${HIP_FILE}) + list(APPEND HIP_SOURCES ${HIP_FILE}) -list(APPEND CPP_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp) -add_library(rccl ${CPP_SOURCES}) + # Create a custom command to create hipified source code + add_custom_command( + OUTPUT ${HIP_FILE} + COMMAND mkdir -p ${HIP_FILE_DIR} && $ ${hipify-perl_executable} -quiet-warnings ${CMAKE_SOURCE_DIR}/${SRC_FILE} -o ${HIP_FILE} + MAIN_DEPENDENCY ${SRC_FILE} + COMMENT "Hipifying ${SRC_FILE} -> ${HIP_FILE}" + ) +endforeach() -message ("-- Hipifying source") -set(HIPIFY_SOURCES - src/collectives/all_gather_api.cpp - src/collectives/all_reduce_api.cpp - src/collectives/all_to_all_api.cpp - src/collectives/all_to_allv_api.cpp - src/collectives/broadcast_api.cpp - src/collectives/gather_api.cpp - src/collectives/reduce_api.cpp - src/collectives/reduce_scatter_api.cpp - src/collectives/scatter_api.cpp - src/collectives/sendrecv_api.cpp - src/collectives/msccl_api.cpp - src/debug.cpp - src/enqueue.cpp - src/graph/xml.cpp - src/group.cpp - src/include/alloc.h - src/include/checks.h - src/include/info.h - src/include/proxy.h - src/include/strongstream.h - src/init.cpp - src/misc/argcheck.cpp - src/misc/shmutils.cpp - src/misc/strongstream.cpp - src/misc/utils.cpp - src/net_api.cpp - src/proxy.cpp - src/transport.cpp - src/transport/coll_net.cpp - src/transport/net.cpp - src/transport/net_socket.cpp - src/transport/p2p.cpp - src/transport/shm.cpp) -find_program( hipify-perl_executable hipify-perl ) -foreach(filename ${HIPIFY_SOURCES}) - message (" ${filename}") - execute_process(COMMAND bash "-c" "${hipify-perl_executable} -inplace -quiet-warnings ${PROJECT_BINARY_DIR}/${filename}" OUTPUT_VARIABLE HIPIFY_OUTPUT ERROR_VARIABLE HIPIFY_OUTPUT) -endforeach(filename) -message ("-- Hipifying source - done") +# Create an initial git_version.cpp file (that will be updated with latest git version) +#================================================================================================== +file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp "") +list(APPEND HIP_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp) -# Create a custom target that creates/updates git_version.cpp -# that executes whenever rccl is built +# Create a custom target that updates git_version.cpp and executes whenever rccl is built add_custom_target(git_version_check COMMENT "Updating git_version.cpp if necessary" COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/git_version.cmake VERBATIM ) -# Create a dummy git_version.cpp file in case it doesn't exist -configure_file(src/nccl.h.in ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp) +# Set up RCCL library +#================================================================================================== +## Set RCCL source files +add_library(rccl ${HIP_SOURCES}) -# Execute git_version_check whenever rccl library is built -add_dependencies(rccl git_version_check) - -add_definitions(-DNVTX_NO_IMPL) - -if(TRACE) - add_definitions(-DENABLE_TRACE) -endif() - -if(PROFILE) - add_definitions(-DENABLE_PROFILING) -endif() - -if(NPKIT_FLAGS) - add_definitions(${NPKIT_FLAGS}) -endif() - -set(COLLTRACE 1 CACHE BOOL "Collective Trace Option") -if(COLLTRACE) - add_definitions(-DENABLE_COLLTRACE) -endif() - -enable_language(C) -CHECK_INCLUDE_FILES(bfd.h HAVE_BFD) -if (HAVE_BFD) - add_definitions(-DHAVE_BFD) - message ("-- Found BFD") - CHECK_SYMBOL_EXISTS(bfd_get_section_flags "bfd.h" HAVE_DECL_BFD_GET_SECTION_FLAGS) - if (HAVE_DECL_BFD_GET_SECTION_FLAGS) - add_definitions(-DHAVE_DECL_BFD_GET_SECTION_FLAGS) - endif() - CHECK_SYMBOL_EXISTS(bfd_get_section_vma "bfd.h" HAVE_DECL_BFD_GET_SECTION_VMA) - if (HAVE_DECL_BFD_GET_SECTION_VMA) - add_definitions(-DHAVE_DECL_BFD_GET_SECTION_VMA) - endif() - CHECK_CXX_SOURCE_COMPILES( - "#include - - int main (int argc, char **argv) { - bfd_size_type size; - bfd abfd; - asection sec; - size = bfd_section_size(&abfd, &sec); - return (int)(size); - }" - HAVE_TWO_ARG_BFD_SECTION_SIZE) - if (HAVE_TWO_ARG_BFD_SECTION_SIZE) - add_definitions(-DHAVE_TWO_ARG_BFD_SECTION_SIZE) - endif() - find_path(DEMANGLE_HEADER demangle.h PATHS /usr/include PATH_SUFFIXES libiberty) - if(NOT DEMANGLE_HEADER) - message("Could not find demangle.h ${DEMANGLE_HEADER}") - else() - add_definitions(-DHAVE_CPLUS_DEMANGLE) - message("Found demangle.h in ${DEMANGLE_HEADER}") - set (HAVE_CPLUS_DEMANGLE 1) - set (HAVE_DECL_BASENAME "1") - INCLUDE_DIRECTORIES(${DEMANGLE_HEADER}) - endif() -endif() - -find_package(rocm_smi PATHS ${ROCM_PATH}/lib/cmake/rocm_smi) -if (rocm_smi_FOUND) - message ("-- Found rocm_smi at ${ROCM_SMI_INCLUDE_DIR}") - CHECK_INCLUDE_FILE_CXX("${ROCM_SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG) -else() - message ("-- Checking old include directory structure for rocm_smi") - set(ROCM_SMI_INCLUDE_DIR "${ROCM_PATH}/rocm_smi/include") - set(ROCM_SMI_LIB_DIR "${ROCM_PATH}/rocm_smi/lib") - set(ROCM_SMI_LIBRARIES rocm_smi64) - CHECK_INCLUDE_FILE_CXX("${ROCM_SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG) -endif() -IF(HAVE_ROCM_SMI64CONFIG) - add_definitions(-DUSE_ROCM_SMI64CONFIG) -ENDIF() - -foreach(target ${AMDGPU_TARGETS}) - target_link_libraries(rccl PRIVATE --amdgpu-target=${target}) -endforeach() - -set(ENABLE_IFC 1 CACHE BOOL "Enable indirect function call") -if("${HIP_COMPILER}" MATCHES "clang") - find_program( hipcc_executable hipcc ) - execute_process(COMMAND bash "-c" "${hipcc_executable} --version | grep 'HIP version' | awk -F\" \" '{ printf $3}' | awk -F\"-\" '{ printf $1}'" OUTPUT_VARIABLE hipcc_version_string) - message(STATUS "hipcc version: ${hipcc_version_string}") - if(${hipcc_version_string} VERSION_GREATER_EQUAL "5.5.30201" AND ENABLE_IFC) - add_definitions(-DUSE_INDIRECT_FUNCTION_CALL) - target_compile_options(rccl PRIVATE -fvisibility=hidden) - message(STATUS "Indirect function call enabled") - else() - target_compile_options(rccl PRIVATE -fvisibility=hidden --hipcc-func-supp) - endif() - foreach(target ${AMDGPU_TARGETS}) - target_compile_options(rccl PRIVATE -fgpu-rdc) - endforeach() - target_link_libraries(rccl PRIVATE -fgpu-rdc) - target_include_directories(rccl PRIVATE ${ROCM_PATH}/include) - execute_process(COMMAND bash "-c" "${hipcc_executable} -help | grep 'parallel-jobs'" OUTPUT_VARIABLE hipcc_parallel_jobs) - if("${hipcc_parallel_jobs}" MATCHES "parallel-jobs") - target_compile_options(rccl PRIVATE -parallel-jobs=12 PRIVATE -Wno-format-nonliteral) - target_link_libraries(rccl PRIVATE -parallel-jobs=12) - endif() - - # RCCL static lib uses -fgpu-rdc which requires hipcc as the linker and archiver - if(BUILD_STATIC) - target_link_libraries(rccl PRIVATE --emit-static-lib) - set(CMAKE_AR "${hipcc_executable}") - get_property(link_libraries TARGET rccl PROPERTY LINK_LIBRARIES) - string (REPLACE ";" " " LINK_PROPS "${link_libraries}") - set(CMAKE_CXX_ARCHIVE_CREATE " -o ${LINK_PROPS} ") - endif() -endif() - -if("${HIP_COMPILER}" MATCHES "hcc") - find_program( hcc_executable hcc ) - execute_process(COMMAND bash "-c" "${hcc_executable} --version | sed -e '1!d' -e 's/.*based on HCC\\s*//'" OUTPUT_VARIABLE hcc_version_string) - execute_process(COMMAND bash "-c" "echo \"${hcc_version_string}\" | awk -F\".\" '{ printf $1}'" OUTPUT_VARIABLE hcc_major_version) - execute_process(COMMAND bash "-c" "echo \"${hcc_version_string}\" | awk -F\".\" '{ printf $2}'" OUTPUT_VARIABLE hcc_minor_version) - if ("${hcc_major_version}.${hcc_minor_version}" VERSION_LESS "4.0") - target_link_libraries(rccl PRIVATE -hc-function-calls) - endif() -endif() +## Set RCCL dependencies +add_dependencies(rccl git_version_check) # Execute git_version_check during build +## Set RCCL include directories +target_include_directories(rccl PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header +target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src) # for hipfied headers +target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include) +target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/collectives) +target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/collectives/device) +target_include_directories(rccl PRIVATE ${HSA_INCLUDE_PATH}) target_include_directories(rccl PRIVATE ${ROCM_SMI_INCLUDE_DIR}) -target_link_libraries(rccl PRIVATE hip::device dl -l${ROCM_SMI_LIBRARIES} -L${ROCM_SMI_LIB_DIR}) -target_link_libraries(rccl INTERFACE hip::host) +if(DEMANGLE_DIR) + target_include_directories(rccl PRIVATE ${DEMANGLE_DIR}) +endif() -if(HAVE_BFD) - target_link_libraries(rccl PRIVATE bfd dl z) - find_library(HAVE_IBERTY iberty PATHS /usr/lib64 /usr/lib/ - PATH_SUFFIXES x86_64-linux-gnu) +## Set RCCL compile definitions +target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) # NVTX is not supported +if(COLLTRACE) + target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE) +endif() +if(HAVE_ROCM_SMI64CONFIG) + target_compile_definitions(rccl PRIVATE USE_ROCM_SMI64CONFIG) +endif() +if(NPKIT_FLAGS) + target_compile_definitions(rccl PRIVATE ${NPKIT_FLAGS}) +endif() +if(PROFILE) + target_compile_definitions(rccl PRIVATE ENABLE_PROFILING) +endif() +if(TRACE) + target_compile_definitions(rccl PRIVATE ENABLE_TRACE) +endif() +if(${HAS_HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED}) + target_compile_definitions(rccl PRIVATE QUERY_HSA_DMABUF) +endif() +if(${HIP_EVENT_DISABLE_FENCE}) + target_compile_definitions(rccl PRIVATE HIP_EVENT_DISABLE_FENCE) +endif() +if (BUILD_BFD) + if (HAVE_BFD) + target_compile_definitions(rccl PRIVATE HAVE_BFD) + endif() + if (HAVE_DECL_BFD_GET_SECTION_FLAGS) + target_compile_definitions(rccl PRIVATE HAVE_DECL_BFD_GET_SECTION_FLAGS) + endif() + if (HAVE_DECL_BFD_GET_SECTION_VMA) + target_compile_definitions(rccl PRIVATE HAVE_DECL_BFD_GET_SECTION_VMA) + endif() + if (HAVE_TWO_ARG_BFD_SECTION_SIZE) + target_compile_definitions(rccl PRIVATE HAVE_TWO_ARG_BFD_SECTION_SIZE) + endif() +endif() +if (IFC_ENABLED) + target_compile_definitions(rccl PRIVATE USE_INDIRECT_FUNCTION_CALL) +endif() +if(DEMANGLE_DIR) + target_compile_definitions(rccl PRIVATE "HAVE_CPLUS_DEMANGLE=1") + target_compile_definitions(rccl PRIVATE "HAVE_DECL_BASENAME=1") +endif() + +## Set RCCL compile options +target_compile_options(rccl PRIVATE -parallel-jobs=12) +target_compile_options(rccl PRIVATE -Wno-format-nonliteral) +target_compile_options(rccl PRIVATE -fgpu-rdc) # Generate relocatable device code (required for extern __shared__) +target_compile_options(rccl PRIVATE -fvisibility=hidden) # Set symbol visibility to hidden by default + +## NOTE: This is currently being handled by rocm-cmake, however may need to be re-enabled in the future +#foreach(target ${GPU_TARGETS}) +# target_compile_options(rccl PRIVATE --offload-arch=${target}) +#endforeach() + +if(BUILD_ADDRESS_SANITIZER) + target_compile_options(rccl PRIVATE "-fsanitize=address -shared-libasan") +endif() +if(TIMETRACE) + target_compile_options(rccl PRIVATE -ftime-trace) +endif() +if(NOT IFC_ENABLED) + target_compile_options(rccl PRIVATE --hipcc-func-supp) +endif() + +## Set RCCL linked library directories +target_link_directories(rccl PRIVATE ${ROCM_SMI_LIB_DIR}) + +## Set RCCL linked libraries +target_link_libraries(rccl PRIVATE Threads::Threads) +target_link_libraries(rccl INTERFACE hip::host) +target_link_libraries(rccl PRIVATE hip::device) +target_link_libraries(rccl PRIVATE dl) +target_link_libraries(rccl PRIVATE ${ROCM_SMI_LIBRARIES}) +if (HAS_BFD) + target_link_libraries(rccl PRIVATE bfd) if(HAVE_IBERTY) - message("iberty found @ ${HAVE_IBERTY} ") - target_link_libraries(rccl PRIVATE iberty dl z) + target_link_libraries(rccl PRIVATE iberty z) endif() endif() -#Setup librccl.so version +## Set RCCL link options +target_link_options(rccl PRIVATE -fgpu-rdc) # Required when linking relocatable device code +target_link_options(rccl PRIVATE -parallel-jobs=12) # Use multiple threads to link +if(BUILD_ADDRESS_SANITIZER) + target_link_options(rccl PRIVATE -fuse-ld=lld) +endif() +if(TIMETRACE) + target_link_options(rccl PRIVATE -ftime-trace) +endif() +if(NOT BUILD_SHARED_LIBS) + message(STATUS "Building static RCCL library") + target_link_options(rccl PRIVATE --emit-static-lib) + set(CMAKE_AR "${hipcc_executable}") + get_property(link_libraries TARGET rccl PROPERTY LINK_LIBRARIES) + string (REPLACE ";" " " LINK_PROPS "${link_libraries}") + set(CMAKE_CXX_ARCHIVE_CREATE " -o ${LINK_PROPS} ") +else() + message(STATUS "Building shared RCCL library") +endif() + +## Track linking time +set_property(TARGET rccl PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time") + +## Setup librccl.so version rocm_set_soversion(rccl "1.0") -rocm_install_targets(TARGETS - rccl - ) -rocm_install(FILES ${PROJECT_BINARY_DIR}/include/rccl/rccl.h src/include/nccl_net.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rccl) +# Install settings +#================================================================================================== +## Specify install targets +rocm_install_targets(TARGETS rccl) +rocm_install(FILES ${PROJECT_BINARY_DIR}/include/rccl/rccl.h src/include/nccl_net.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rccl) file(COPY tools/msccl-algorithms DESTINATION ${PROJECT_BINARY_DIR}) file(COPY tools/msccl-unit-test-algorithms DESTINATION ${PROJECT_BINARY_DIR}) install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-algorithms DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-unit-test-algorithms DESTINATION ${CMAKE_INSTALL_LIBDIR}) -rocm_export_targets(NAMESPACE - roc:: - TARGETS - rccl - DEPENDS - hip) +rocm_export_targets( + NAMESPACE roc:: + TARGETS rccl + DEPENDS hip) + +## Build with backwards compatibility if requested if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY) - #Create wrapper files - rocm_wrap_header_dir( "${PROJECT_BINARY_DIR}/include/rccl" - PATTERNS "rccl.h" - GUARDS SYMLINK WRAPPER - WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} rccl/${CMAKE_INSTALL_INCLUDEDIR}) - #install the wrapper header file to package - rocm_install( FILES ${PROJECT_BINARY_DIR}/rccl/include/rccl.h src/include/nccl_net.h - DESTINATION "./rccl/${CMAKE_INSTALL_INCLUDEDIR}/" ) - rocm_install( FILES ${PROJECT_BINARY_DIR}/include/rccl.h src/include/nccl_net.h - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/" ) + ### Create wrapper files + rocm_wrap_header_dir( + "${PROJECT_BINARY_DIR}/include/rccl" + PATTERNS "rccl.h" + GUARDS SYMLINK WRAPPER + WRAPPER_LOCATIONS ${CMAKE_INSTALL_INCLUDEDIR} rccl/${CMAKE_INSTALL_INCLUDEDIR}) + + ### install the wrapper header file to package + rocm_install( + FILES ${PROJECT_BINARY_DIR}/rccl/include/rccl.h src/include/nccl_net.h + DESTINATION "./rccl/${CMAKE_INSTALL_INCLUDEDIR}/" ) + rocm_install( + FILES ${PROJECT_BINARY_DIR}/include/rccl.h src/include/nccl_net.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/" ) endif() +## Set package dependencies rocm_package_add_dependencies(DEPENDS "hip-rocclr >= 3.5.0" "rocm-smi-lib >= 4.0.0") set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt" "${ROCM_PATH}") @@ -595,10 +680,7 @@ if(BUILD_TESTS) endif() rocm_create_package( - NAME - rccl - DESCRIPTION - "ROCm Communication Collectives Library" - MAINTAINER - "RCCL Maintainer " + NAME rccl + DESCRIPTION "ROCm Communication Collectives Library" + MAINTAINER "RCCL Maintainer " LDCONFIG) diff --git a/README.md b/README.md index 379df2d277..0f0f48db64 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,7 @@ The root of this repository has a helper script 'install.sh' to build and instal * `./install.sh -t` -- builds library including rccl unit tests * `./install.sh -r` -- runs rccl unit tests (must be already built) * `./install.sh -p` -- builds RCCL package -* `./install.sh -s` -- builds RCCL as a static library (default: shared) -* `./install.sh -hcc` -- builds RCCL with hcc compiler; note that hcc is now deprecated. (default:hip-clang) +* `./install.sh --static` -- builds RCCL as a static library (default: shared) * `./install.sh --prefix` -- specify custom path to install RCCL to (default:/opt/rocm) * `./install.sh --npkit-enable` -- enable compilation of npkit profiler framework with all options diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 124c268f29..9fed3dea85 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -106,6 +106,34 @@ if(NOT ROCM_FOUND) find_package( ROCM 0.7.3 REQUIRED CONFIG PATHS ${PROJECT_EXTERN_DIR}/rocm-cmake ) endif() +# Find available local ROCM targets +# NOTE: This will eventually be part of ROCm-CMake and should be removed at that time +function(rocm_local_targets VARIABLE) + set(${VARIABLE} "NOTFOUND" PARENT_SCOPE) + find_program(_rocm_agent_enumerator rocm_agent_enumerator HINTS /opt/rocm/bin ENV ROCM_PATH) + if(NOT _rocm_agent_enumerator STREQUAL "_rocm_agent_enumerator-NOTFOUND") + execute_process( + COMMAND "${_rocm_agent_enumerator}" + RESULT_VARIABLE _found_agents + OUTPUT_VARIABLE _rocm_agents + ERROR_QUIET + ) + if (_found_agents EQUAL 0) + string(REPLACE "\n" ";" _rocm_agents "${_rocm_agents}") + unset(result) + foreach (agent IN LISTS _rocm_agents) + if (NOT agent STREQUAL "gfx000") + list(APPEND result "${agent}") + endif() + endforeach() + if(result) + list(REMOVE_DUPLICATES result) + set(${VARIABLE} "${result}" PARENT_SCOPE) + endif() + endif() + endif() +endfunction() + include(ROCMSetupVersion) include(ROCMCreatePackage) include(ROCMInstallTargets) @@ -113,4 +141,4 @@ include(ROCMPackageConfigHelpers) include(ROCMInstallSymlinks) include(ROCMCheckTargetIds) include(ROCMClients) -include( ROCMHeaderWrapper ) +include(ROCMHeaderWrapper) diff --git a/install.sh b/install.sh index ec9766efec..fab11b0037 100755 --- a/install.sh +++ b/install.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2021 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved. # ################################################# # helper functions @@ -8,40 +8,48 @@ function display_help() { echo "RCCL build & installation helper script" echo "./install [-h|--help] " - echo " [-h|--help] prints this help message." - echo " [-i|--install] install RCCL library (see --prefix argument below.)" - echo " [-d|--dependencies] install RCCL depdencencies." - echo " [-p|--package_build] Build RCCL package." - echo " [-t|--tests_build] Build rccl unit tests, but do not run." - echo " [-r|--run_tests_quick] Run small subset of rccl unit tests (must be built already.)" - echo " [-s|--static] Build RCCL as a static library instead of shared library." - echo " [--run_tests_all] Run all rccl unit tests (must be built already.)" - echo " [--hcc] Build library using deprecated hcc compiler (default:hip-clang)." - echo " [--prefix] Specify custom directory to install RCCL to (default: /opt/rocm)." - echo " [--address-sanitizer] Build with address sanitizer enabled" - echo " [--build_allreduce_only] Build only AllReduce + sum + float kernel" - echo " [--rm-legacy-include-dir] Remove legacy include dir Packaging added for file/folder reorg backward compatibility" - echo " [--npkit-enable] Compile with npkit enabled" + echo " --address-sanitizer Build with address sanitizer enabled" + echo " --build_allreduce_only Build only AllReduce + sum + float kernel" + echo " -d|--dependencies Install RCCL depdencencies" + echo " --debug Build debug library" + echo " --disable_backtrace Build without custom backtrace support" + echo " --fast Quick-build RCCL (local gpu arch only, no backtrace support)" + echo " -h|--help Prints this help message" + echo " -i|--install Install RCCL library (see --prefix argument below)" + echo " --local_gpu_only Only compile for local GPU architecture" + echo " --no_clean Don't delete files if they already exist" + echo " --npkit-enable Compile with npkit enabled" + echo " -p|--package_build Build RCCL package" + echo " --prefix Specify custom directory to install RCCL to (default: /opt/rocm)" + echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility" + echo " --run_tests_all Run all rccl unit tests (must be built already)" + echo " -r|--run_tests_quick Run small subset of rccl unit tests (must be built already)" + echo " --static Build RCCL as a static library instead of shared library" + echo " -t|--tests_build Build rccl unit tests, but do not run" + echo " --verbose Show compile commands" } # ################################################# # global variables # ################################################# -build_package=false ROCM_PATH=${ROCM_PATH:="/opt/rocm"} -build_tests=false + +build_address_sanitizer=false +build_allreduce_only=false +install_dependencies=false +build_release=true +build_bfd=true +install_library=false +build_local_gpu_only=false +clean_build=true +npkit_enabled=false +build_package=false +build_freorg_bkwdcomp=true run_tests=false run_tests_all=false -build_release=true -build_address_sanitizer=false -install_library=false -build_hip_clang=true -clean_build=true -install_dependencies=false build_static=false -build_allreduce_only=false -build_freorg_bkwdcomp=true -npkit_enabled=false +build_tests=false +build_verbose=0 # ################################################# # Parameter parsing @@ -50,7 +58,7 @@ npkit_enabled=false # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,install,dependencies,package_build,tests_build,run_tests_quick,static,run_tests_all,hcc,hip-clang,no_clean,prefix:,address-sanitizer,build_allreduce_only,npkit-enable,rm-legacy-include-dir --options hidptrs -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,fast,help,install,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,verbose --options hidptrs -- "$@") else echo "Need a new version of getopt" exit 1 @@ -65,62 +73,31 @@ eval set -- "${GETOPT_PARSE}" while true; do case "${1}" in - -h|--help) - display_help - exit 0 - ;; - -i|--install) - install_library=true - shift ;; - -d|--dependencies) - install_dependencies=true - shift;; - -p|--package_build) - build_package=true - shift ;; - -t|--tests_build) - build_tests=true - shift ;; - -r|--run_tests_quick) - run_tests=true - shift ;; - -s|--static) - build_static=true - shift ;; - --run_tests_all) - run_tests=true - run_tests_all=true - shift ;; - --hcc) - build_hip_clang=false - shift ;; - --hip-clang) - build_hip_clang=true - shift ;; - --no_clean) - clean_build=false - shift ;; - --address-sanitizer) - build_address_sanitizer=true - shift ;; - --build_allreduce_only) - build_allreduce_only=true - shift ;; - --rm-legacy-include-dir) - build_freorg_bkwdcomp=false - shift ;; - --npkit-enable) - npkit_enabled=true - shift ;; - --prefix) - install_prefix=${2} - shift 2 ;; + --address-sanitizer) build_address_sanitizer=true; shift ;; + --build_allreduce_only) build_allreduce_only=true; shift ;; + -d | --dependencies) install_dependencies=true; shift ;; + --debug) build_release=false; shift ;; + --disable_backtrace) build_bfd=false; shift ;; + --fast) build_bfd=false; build_local_gpu_only=true; shift ;; + -h | --help) display_help; exit 0 ;; + -i | --install) install_library=true; shift ;; + --local_gpu_only) build_local_gpu_only=true; shift ;; + --no_clean) clean_build=false; shift ;; + --npkit-enable) npkit_enabled=true; shift ;; + -p | --package_build) build_package=true; shift ;; + --prefix) install_prefix=${2} shift 2 ;; + --rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;; + -r | --run_tests_quick) run_tests=true; shift ;; + --run_tests_all) run_tests=true; run_tests_all=true; shift ;; + --static) build_static=true; shift ;; + -t | --tests_build) build_tests=true; shift ;; + --verbose) build_verbose=1; shift ;; --) shift ; break ;; *) echo "Unexpected command line parameter received; aborting"; exit 1 ;; esac - done +done ROCM_BIN_PATH=$ROCM_PATH/bin @@ -183,42 +160,50 @@ else cmake_common_options="${cmake_common_options} -DCMAKE_BUILD_TYPE=Debug" fi -# shared vs static -if [[ "${build_static}" == true ]]; then - cmake_common_options="${cmake_common_options} -DBUILD_STATIC=ON" -fi - -# sanitizer +# Address sanitizer if [[ "${build_address_sanitizer}" == true ]]; then -cmake_common_options="${cmake_common_options} -DBUILD_ADDRESS_SANITIZER=ON" + cmake_common_options="${cmake_common_options} -DBUILD_ADDRESS_SANITIZER=ON" fi -#Enable backward compatibility wrappers +# AllReduce only +if [[ "${build_allreduce_only}" == true ]]; then + cmake_common_options="${cmake_common_options} -DBUILD_ALLREDUCE_ONLY=ON" +fi + +# Backtrace support +if [[ "${build_bfd}" == false ]]; then + cmake_common_options="${cmake_common_options} -DBUILD_BFD=OFF" +fi + +# Backward compatibility wrappers if [[ "${build_freorg_bkwdcomp}" == true ]]; then cmake_common_options="${cmake_common_options} -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=ON" else cmake_common_options="${cmake_common_options} -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF" fi -compiler=hipcc -if [[ "${build_hip_clang}" == false ]]; then - compiler=hcc +# Build local GPU arch only +if [[ "$build_local_gpu_only" == true ]]; then + cmake_common_options="${cmake_common_options} -DBUILD_LOCAL_GPU_TARGET_ONLY=ON" +fi + +# shared vs static +if [[ "${build_static}" == true ]]; then + cmake_common_options="${cmake_common_options} -DBUILD_SHARED_LIBS=OFF" +fi + + +# Install dependencies +if ($install_dependencies); then + cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON" fi cmake_executable=cmake case "${OS_ID}" in centos|rhel) - cmake_executable=cmake3 - ;; - esac - -if ($install_dependencies); then - cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON" -fi - -if ($build_allreduce_only); then - cmake_common_options="${cmake_common_options} -DBUILD_ALLREDUCE_ONLY=ON" -fi + cmake_executable=cmake3 + ;; +esac npkit_options="" if ($npkit_enabled); then @@ -307,16 +292,16 @@ fi check_exit_code "$?" if ($build_tests) || (($run_tests) && [[ ! -f ./test/rccl-UnitTests ]]); then - CXX=$ROCM_BIN_PATH/$compiler $cmake_executable $cmake_common_options -DBUILD_TESTS=ON -DNPKIT_FLAGS="${npkit_options}" -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCM_PATH=$ROCM_PATH ../../. + CXX=$ROCM_BIN_PATH/hipcc $cmake_executable $cmake_common_options -DBUILD_TESTS=ON -DNPKIT_FLAGS="${npkit_options}" -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCM_PATH=$ROCM_PATH ../../. else - CXX=$ROCM_BIN_PATH/$compiler $cmake_executable $cmake_common_options -DBUILD_TESTS=OFF -DNPKIT_FLAGS="${npkit_options}" -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCM_PATH=$ROCM_PATH ../../. + CXX=$ROCM_BIN_PATH/hipcc $cmake_executable $cmake_common_options -DBUILD_TESTS=OFF -DNPKIT_FLAGS="${npkit_options}" -DCMAKE_INSTALL_PREFIX=$ROCM_PATH -DROCM_PATH=$ROCM_PATH ../../. fi check_exit_code "$?" if ($install_library); then - make -j$(nproc) install + VERBOSE=${build_verbose} make -j$(nproc) install else - make -j$(nproc) + VERBOSE=${build_verbose} make -j$(nproc) fi check_exit_code "$?" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0b00f33cea..190839b774 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -65,6 +65,8 @@ if(BUILD_TESTS) add_executable(rccl-UnitTests ${COMMON_SOURCE_FILES} ${TEST_SOURCE_FILES}) target_include_directories(rccl-UnitTests PRIVATE ${ROCM_PATH} ${GTEST_INCLUDE_DIRS}) + target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header + target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/hipify/src/include) # for rccl_bfloat16.h target_link_libraries(rccl-UnitTests PRIVATE ${GTEST_BOTH_LIBRARIES}) target_link_libraries(rccl-UnitTests PRIVATE hip::host hip::device hsa-runtime64::hsa-runtime64)