From 64c32d1c5bdf4c6d5c30a335035830e3b5902511 Mon Sep 17 00:00:00 2001 From: Bertan Dogancay <111835151+BertanDogancay@users.noreply.github.com> Date: Wed, 2 Aug 2023 09:45:18 -0600 Subject: [PATCH] Disable MSCCL kernels at compile time (#834) * Disable MSCCL kernels at compile time --- CMakeLists.txt | 20 +++++++++--- install.sh | 53 +++++++++++++++++-------------- src/misc/msccl/msccl_lifecycle.cc | 4 +++ src/misc/msccl/msccl_setup.cc | 4 +++ 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cdccaae8d..dcce63ccde 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on option(BUILD_SHARED_LIBS "Build as shared library" ON) option(BUILD_TESTS "Build unit test programs" OFF) option(COLLTRACE "Collective Trace Option" ON) +option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON) option(ENABLE_IFC "Enable indirect function call" OFF) option(INSTALL_DEPENDENCIES "Force install dependencies" OFF) option(PROFILE "Enable profiling" OFF) @@ -279,9 +280,7 @@ set(SRC_FILES src/collectives/device/reduce_kernel.h src/collectives/device/reduce_scatter.h src/collectives/device/sendrecv.h - src/collectives/device/msccl_kernel_impl.h src/collectives/gather.cc - src/collectives/msccl.cc src/collectives/reduce.cc src/collectives/reduce_scatter.cc src/collectives/scatter.cc @@ -328,7 +327,6 @@ set(SRC_FILES src/include/ibvwrap.h src/include/info.h src/include/ipcsocket.h - src/include/msccl/msccl_kernel.h src/include/msccl/msccl_lifecycle.h src/include/msccl/msccl_parser.h src/include/msccl/msccl_scheduler.h @@ -443,6 +441,15 @@ else() endif() list(APPEND SRC_FILES ${CU_SOURCES}) +if (ENABLE_MSCCL_KERNEL) + set(MSCCL_KERNEL_SOURCES + src/collectives/device/msccl_kernel_impl.h + src/include/msccl/msccl_kernel.h + src/collectives/msccl.cc + ) + list(APPEND SRC_FILES ${MSCCL_KERNEL_SOURCES}) +endif() + # Hipify source files (copy of source generated into hipify directory) #================================================================================================== find_program(hipify-perl_executable hipify-perl) @@ -475,7 +482,9 @@ endforeach() expand_collectives("all_reduce" "AllReduce") expand_collectives("reduce" "Reduce") expand_collectives("reduce_scatter" "ReduceScatter") -expand_collectives("msccl_kernel" "MscclKernel") +if(ENABLE_MSCCL_KERNEL) + expand_collectives("msccl_kernel" "MscclKernel") +endif() # Create an initial git_version.cpp file (that will be updated with latest git version) #================================================================================================== @@ -514,6 +523,9 @@ target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) # if(COLLTRACE) target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE) endif() +if(ENABLE_MSCCL_KERNEL) + target_compile_definitions(rccl PRIVATE COMPILE_MSCCL_KERNEL) +endif() if(HAVE_ROCM_SMI64CONFIG) target_compile_definitions(rccl PRIVATE USE_ROCM_SMI64CONFIG) endif() diff --git a/install.sh b/install.sh index 532680125e..2c209f5edf 100755 --- a/install.sh +++ b/install.sh @@ -22,6 +22,7 @@ collective_trace=true enable_ninja="" install_dependencies=false install_library=false +msccl_kernel_enabled=true num_parallel_jobs=16 npkit_enabled=false run_tests=false @@ -41,6 +42,7 @@ function display_help() echo " --debug Build debug library" echo " --disable_backtrace Build without custom backtrace support" echo " --disable-colltrace Build without collective trace" + echo " --disable-msccl-kernel Build without MSCCL kernels" echo " -f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)" echo " -h|--help Prints this help message" echo " -i|--install Install RCCL library (see --prefix argument below)" @@ -66,7 +68,7 @@ function display_help() # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,fast,help,install,jobs:,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose -- "$@") else echo "Need a new version of getopt" exit 1 @@ -81,28 +83,29 @@ eval set -- "${GETOPT_PARSE}" while true; do case "${1}" in - --address-sanitizer) build_address_sanitizer=true; shift ;; - --build_allreduce_only) build_allreduce_only=true; shift ;; - -d | --dependencies) install_dependencies=true; shift ;; - --debug) build_release=false; shift ;; - --disable_backtrace) build_bfd=false; shift ;; - --disable-colltrace) collective_trace=false; shift ;; - -f | --fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; shift ;; - -h | --help) display_help; exit 0 ;; - -i | --install) install_library=true; shift ;; - -j | --jobs) num_parallel_jobs=${2}; shift 2 ;; - -l | --local_gpu_only) build_local_gpu_only=true; shift ;; - --no_clean) clean_build=false; shift ;; - --npkit-enable) npkit_enabled=true; shift ;; - -p | --package_build) build_package=true; shift ;; - --prefix) install_prefix=${2}; shift 2 ;; - --rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;; - -r | --run_tests_quick) run_tests=true; shift ;; - --run_tests_all) run_tests=true; run_tests_all=true; shift ;; - --static) build_static=true; shift ;; - -t | --tests_build) build_tests=true; shift ;; - --time-trace) time_trace=true; shift ;; - --verbose) build_verbose=1; shift ;; + --address-sanitizer) build_address_sanitizer=true; shift ;; + --build_allreduce_only) build_allreduce_only=true; shift ;; + -d | --dependencies) install_dependencies=true; shift ;; + --debug) build_release=false; shift ;; + --disable_backtrace) build_bfd=false; shift ;; + --disable-colltrace) collective_trace=false; shift ;; + --disable-msccl-kernel) msccl_kernel_enabled=false; shift ;; + -f | --fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; msccl_kernel_enabled=false; shift ;; + -h | --help) display_help; exit 0 ;; + -i | --install) install_library=true; shift ;; + -j | --jobs) num_parallel_jobs=${2}; shift 2 ;; + -l | --local_gpu_only) build_local_gpu_only=true; shift ;; + --no_clean) clean_build=false; shift ;; + --npkit-enable) npkit_enabled=true; shift ;; + -p | --package_build) build_package=true; shift ;; + --prefix) install_prefix=${2}; shift 2 ;; + --rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;; + -r | --run_tests_quick) run_tests=true; shift ;; + --run_tests_all) run_tests=true; run_tests_all=true; shift ;; + --static) build_static=true; shift ;; + -t | --tests_build) build_tests=true; shift ;; + --time-trace) time_trace=true; shift ;; + --verbose) build_verbose=1; shift ;; --) shift ; break ;; *) echo "Unexpected command line parameter received; aborting"; exit 1 @@ -208,6 +211,10 @@ if [[ "${collective_trace}" == false ]]; then cmake_common_options="${cmake_common_options} -DCOLLTRACE=OFF" fi +if [[ "${msccl_kernel_enabled}" == false ]]; then + cmake_common_options="${cmake_common_options} -DENABLE_MSCCL_KERNEL=OFF" +fi + # Install dependencies if ($install_dependencies); then cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON" diff --git a/src/misc/msccl/msccl_lifecycle.cc b/src/misc/msccl/msccl_lifecycle.cc index 885e7d0bd9..d399c5832b 100644 --- a/src/misc/msccl/msccl_lifecycle.cc +++ b/src/misc/msccl/msccl_lifecycle.cc @@ -30,7 +30,11 @@ static bool mscclSchedulerTriedLoadAlgo = false; static std::mutex mscclLifecycleMutex; bool mscclEnabled() { +#ifdef COMPILE_MSCCL_KERNEL return rcclParamMscclEnabled(); +#else + return false; +#endif } void mscclSetIsCallerFlag() { diff --git a/src/misc/msccl/msccl_setup.cc b/src/misc/msccl/msccl_setup.cc index 947b1b7ba3..e4254b631f 100644 --- a/src/misc/msccl/msccl_setup.cc +++ b/src/misc/msccl/msccl_setup.cc @@ -9,7 +9,9 @@ #include "transport.h" #include "msccl/msccl_lifecycle.h" +#ifdef COMPILE_MSCCL_KERNEL #include "msccl/msccl_kernel.h" +#endif #include "msccl/msccl_setup.h" #include "msccl/msccl_status.h" @@ -243,7 +245,9 @@ static ncclResult_t hostToDevRedOp( // Except for ncclDevPreMulSum and ncclDevSumPostDiv required by ncclAvg void* mscclKernelEntries[(ncclNumDevRedOps - 2) * ncclNumTypes * NCCL_NUM_PROTOCOLS] = { +#ifdef COMPILE_MSCCL_KERNEL MSCCL_KERNEL_ENTRY() +#endif }; ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count,