Disable MSCCL kernels at compile time (#834)
* Disable MSCCL kernels at compile time
This commit is contained in:
+16
-4
@@ -17,6 +17,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on
|
||||
option(BUILD_SHARED_LIBS "Build as shared library" ON)
|
||||
option(BUILD_TESTS "Build unit test programs" OFF)
|
||||
option(COLLTRACE "Collective Trace Option" ON)
|
||||
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
|
||||
option(ENABLE_IFC "Enable indirect function call" OFF)
|
||||
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
|
||||
option(PROFILE "Enable profiling" OFF)
|
||||
@@ -279,9 +280,7 @@ set(SRC_FILES
|
||||
src/collectives/device/reduce_kernel.h
|
||||
src/collectives/device/reduce_scatter.h
|
||||
src/collectives/device/sendrecv.h
|
||||
src/collectives/device/msccl_kernel_impl.h
|
||||
src/collectives/gather.cc
|
||||
src/collectives/msccl.cc
|
||||
src/collectives/reduce.cc
|
||||
src/collectives/reduce_scatter.cc
|
||||
src/collectives/scatter.cc
|
||||
@@ -328,7 +327,6 @@ set(SRC_FILES
|
||||
src/include/ibvwrap.h
|
||||
src/include/info.h
|
||||
src/include/ipcsocket.h
|
||||
src/include/msccl/msccl_kernel.h
|
||||
src/include/msccl/msccl_lifecycle.h
|
||||
src/include/msccl/msccl_parser.h
|
||||
src/include/msccl/msccl_scheduler.h
|
||||
@@ -443,6 +441,15 @@ else()
|
||||
endif()
|
||||
list(APPEND SRC_FILES ${CU_SOURCES})
|
||||
|
||||
if (ENABLE_MSCCL_KERNEL)
|
||||
set(MSCCL_KERNEL_SOURCES
|
||||
src/collectives/device/msccl_kernel_impl.h
|
||||
src/include/msccl/msccl_kernel.h
|
||||
src/collectives/msccl.cc
|
||||
)
|
||||
list(APPEND SRC_FILES ${MSCCL_KERNEL_SOURCES})
|
||||
endif()
|
||||
|
||||
# Hipify source files (copy of source generated into hipify directory)
|
||||
#==================================================================================================
|
||||
find_program(hipify-perl_executable hipify-perl)
|
||||
@@ -475,7 +482,9 @@ endforeach()
|
||||
expand_collectives("all_reduce" "AllReduce")
|
||||
expand_collectives("reduce" "Reduce")
|
||||
expand_collectives("reduce_scatter" "ReduceScatter")
|
||||
expand_collectives("msccl_kernel" "MscclKernel")
|
||||
if(ENABLE_MSCCL_KERNEL)
|
||||
expand_collectives("msccl_kernel" "MscclKernel")
|
||||
endif()
|
||||
|
||||
# Create an initial git_version.cpp file (that will be updated with latest git version)
|
||||
#==================================================================================================
|
||||
@@ -514,6 +523,9 @@ target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) #
|
||||
if(COLLTRACE)
|
||||
target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE)
|
||||
endif()
|
||||
if(ENABLE_MSCCL_KERNEL)
|
||||
target_compile_definitions(rccl PRIVATE COMPILE_MSCCL_KERNEL)
|
||||
endif()
|
||||
if(HAVE_ROCM_SMI64CONFIG)
|
||||
target_compile_definitions(rccl PRIVATE USE_ROCM_SMI64CONFIG)
|
||||
endif()
|
||||
|
||||
+30
-23
@@ -22,6 +22,7 @@ collective_trace=true
|
||||
enable_ninja=""
|
||||
install_dependencies=false
|
||||
install_library=false
|
||||
msccl_kernel_enabled=true
|
||||
num_parallel_jobs=16
|
||||
npkit_enabled=false
|
||||
run_tests=false
|
||||
@@ -41,6 +42,7 @@ function display_help()
|
||||
echo " --debug Build debug library"
|
||||
echo " --disable_backtrace Build without custom backtrace support"
|
||||
echo " --disable-colltrace Build without collective trace"
|
||||
echo " --disable-msccl-kernel Build without MSCCL kernels"
|
||||
echo " -f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)"
|
||||
echo " -h|--help Prints this help message"
|
||||
echo " -i|--install Install RCCL library (see --prefix argument below)"
|
||||
@@ -66,7 +68,7 @@ function display_help()
|
||||
# check if we have a modern version of getopt that can handle whitespace and long parameters
|
||||
getopt -T
|
||||
if [[ $? -eq 4 ]]; then
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,fast,help,install,jobs:,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose -- "$@")
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose -- "$@")
|
||||
else
|
||||
echo "Need a new version of getopt"
|
||||
exit 1
|
||||
@@ -81,28 +83,29 @@ eval set -- "${GETOPT_PARSE}"
|
||||
|
||||
while true; do
|
||||
case "${1}" in
|
||||
--address-sanitizer) build_address_sanitizer=true; shift ;;
|
||||
--build_allreduce_only) build_allreduce_only=true; shift ;;
|
||||
-d | --dependencies) install_dependencies=true; shift ;;
|
||||
--debug) build_release=false; shift ;;
|
||||
--disable_backtrace) build_bfd=false; shift ;;
|
||||
--disable-colltrace) collective_trace=false; shift ;;
|
||||
-f | --fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; shift ;;
|
||||
-h | --help) display_help; exit 0 ;;
|
||||
-i | --install) install_library=true; shift ;;
|
||||
-j | --jobs) num_parallel_jobs=${2}; shift 2 ;;
|
||||
-l | --local_gpu_only) build_local_gpu_only=true; shift ;;
|
||||
--no_clean) clean_build=false; shift ;;
|
||||
--npkit-enable) npkit_enabled=true; shift ;;
|
||||
-p | --package_build) build_package=true; shift ;;
|
||||
--prefix) install_prefix=${2}; shift 2 ;;
|
||||
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
|
||||
-r | --run_tests_quick) run_tests=true; shift ;;
|
||||
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
|
||||
--static) build_static=true; shift ;;
|
||||
-t | --tests_build) build_tests=true; shift ;;
|
||||
--time-trace) time_trace=true; shift ;;
|
||||
--verbose) build_verbose=1; shift ;;
|
||||
--address-sanitizer) build_address_sanitizer=true; shift ;;
|
||||
--build_allreduce_only) build_allreduce_only=true; shift ;;
|
||||
-d | --dependencies) install_dependencies=true; shift ;;
|
||||
--debug) build_release=false; shift ;;
|
||||
--disable_backtrace) build_bfd=false; shift ;;
|
||||
--disable-colltrace) collective_trace=false; shift ;;
|
||||
--disable-msccl-kernel) msccl_kernel_enabled=false; shift ;;
|
||||
-f | --fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; msccl_kernel_enabled=false; shift ;;
|
||||
-h | --help) display_help; exit 0 ;;
|
||||
-i | --install) install_library=true; shift ;;
|
||||
-j | --jobs) num_parallel_jobs=${2}; shift 2 ;;
|
||||
-l | --local_gpu_only) build_local_gpu_only=true; shift ;;
|
||||
--no_clean) clean_build=false; shift ;;
|
||||
--npkit-enable) npkit_enabled=true; shift ;;
|
||||
-p | --package_build) build_package=true; shift ;;
|
||||
--prefix) install_prefix=${2}; shift 2 ;;
|
||||
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
|
||||
-r | --run_tests_quick) run_tests=true; shift ;;
|
||||
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
|
||||
--static) build_static=true; shift ;;
|
||||
-t | --tests_build) build_tests=true; shift ;;
|
||||
--time-trace) time_trace=true; shift ;;
|
||||
--verbose) build_verbose=1; shift ;;
|
||||
--) shift ; break ;;
|
||||
*) echo "Unexpected command line parameter received; aborting";
|
||||
exit 1
|
||||
@@ -208,6 +211,10 @@ if [[ "${collective_trace}" == false ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DCOLLTRACE=OFF"
|
||||
fi
|
||||
|
||||
if [[ "${msccl_kernel_enabled}" == false ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DENABLE_MSCCL_KERNEL=OFF"
|
||||
fi
|
||||
|
||||
# Install dependencies
|
||||
if ($install_dependencies); then
|
||||
cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON"
|
||||
|
||||
@@ -30,7 +30,11 @@ static bool mscclSchedulerTriedLoadAlgo = false;
|
||||
static std::mutex mscclLifecycleMutex;
|
||||
|
||||
bool mscclEnabled() {
|
||||
#ifdef COMPILE_MSCCL_KERNEL
|
||||
return rcclParamMscclEnabled();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void mscclSetIsCallerFlag() {
|
||||
|
||||
@@ -9,7 +9,9 @@
|
||||
#include "transport.h"
|
||||
|
||||
#include "msccl/msccl_lifecycle.h"
|
||||
#ifdef COMPILE_MSCCL_KERNEL
|
||||
#include "msccl/msccl_kernel.h"
|
||||
#endif
|
||||
#include "msccl/msccl_setup.h"
|
||||
#include "msccl/msccl_status.h"
|
||||
|
||||
@@ -243,7 +245,9 @@ static ncclResult_t hostToDevRedOp(
|
||||
|
||||
// Except for ncclDevPreMulSum and ncclDevSumPostDiv required by ncclAvg
|
||||
void* mscclKernelEntries[(ncclNumDevRedOps - 2) * ncclNumTypes * NCCL_NUM_PROTOCOLS] = {
|
||||
#ifdef COMPILE_MSCCL_KERNEL
|
||||
MSCCL_KERNEL_ENTRY()
|
||||
#endif
|
||||
};
|
||||
|
||||
ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count,
|
||||
|
||||
Reference in New Issue
Block a user