Disable MSCCL kernels at compile time (#834)

* Disable MSCCL kernels at compile time
This commit is contained in:
Bertan Dogancay
2023-08-02 09:45:18 -06:00
committed by GitHub
parent 02ef257d64
commit 64c32d1c5b
4 changed files with 54 additions and 27 deletions
+16 -4
View File
@@ -17,6 +17,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on
option(BUILD_SHARED_LIBS "Build as shared library" ON)
option(BUILD_TESTS "Build unit test programs" OFF)
option(COLLTRACE "Collective Trace Option" ON)
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
option(ENABLE_IFC "Enable indirect function call" OFF)
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
option(PROFILE "Enable profiling" OFF)
@@ -279,9 +280,7 @@ set(SRC_FILES
src/collectives/device/reduce_kernel.h
src/collectives/device/reduce_scatter.h
src/collectives/device/sendrecv.h
src/collectives/device/msccl_kernel_impl.h
src/collectives/gather.cc
src/collectives/msccl.cc
src/collectives/reduce.cc
src/collectives/reduce_scatter.cc
src/collectives/scatter.cc
@@ -328,7 +327,6 @@ set(SRC_FILES
src/include/ibvwrap.h
src/include/info.h
src/include/ipcsocket.h
src/include/msccl/msccl_kernel.h
src/include/msccl/msccl_lifecycle.h
src/include/msccl/msccl_parser.h
src/include/msccl/msccl_scheduler.h
@@ -443,6 +441,15 @@ else()
endif()
list(APPEND SRC_FILES ${CU_SOURCES})
if (ENABLE_MSCCL_KERNEL)
set(MSCCL_KERNEL_SOURCES
src/collectives/device/msccl_kernel_impl.h
src/include/msccl/msccl_kernel.h
src/collectives/msccl.cc
)
list(APPEND SRC_FILES ${MSCCL_KERNEL_SOURCES})
endif()
# Hipify source files (copy of source generated into hipify directory)
#==================================================================================================
find_program(hipify-perl_executable hipify-perl)
@@ -475,7 +482,9 @@ endforeach()
expand_collectives("all_reduce" "AllReduce")
expand_collectives("reduce" "Reduce")
expand_collectives("reduce_scatter" "ReduceScatter")
expand_collectives("msccl_kernel" "MscclKernel")
if(ENABLE_MSCCL_KERNEL)
expand_collectives("msccl_kernel" "MscclKernel")
endif()
# Create an initial git_version.cpp file (that will be updated with latest git version)
#==================================================================================================
@@ -514,6 +523,9 @@ target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) #
if(COLLTRACE)
target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE)
endif()
if(ENABLE_MSCCL_KERNEL)
target_compile_definitions(rccl PRIVATE COMPILE_MSCCL_KERNEL)
endif()
if(HAVE_ROCM_SMI64CONFIG)
target_compile_definitions(rccl PRIVATE USE_ROCM_SMI64CONFIG)
endif()
+30 -23
View File
@@ -22,6 +22,7 @@ collective_trace=true
enable_ninja=""
install_dependencies=false
install_library=false
msccl_kernel_enabled=true
num_parallel_jobs=16
npkit_enabled=false
run_tests=false
@@ -41,6 +42,7 @@ function display_help()
echo " --debug Build debug library"
echo " --disable_backtrace Build without custom backtrace support"
echo " --disable-colltrace Build without collective trace"
echo " --disable-msccl-kernel Build without MSCCL kernels"
echo " -f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)"
echo " -h|--help Prints this help message"
echo " -i|--install Install RCCL library (see --prefix argument below)"
@@ -66,7 +68,7 @@ function display_help()
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,fast,help,install,jobs:,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose -- "$@")
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose -- "$@")
else
echo "Need a new version of getopt"
exit 1
@@ -81,28 +83,29 @@ eval set -- "${GETOPT_PARSE}"
while true; do
case "${1}" in
--address-sanitizer) build_address_sanitizer=true; shift ;;
--build_allreduce_only) build_allreduce_only=true; shift ;;
-d | --dependencies) install_dependencies=true; shift ;;
--debug) build_release=false; shift ;;
--disable_backtrace) build_bfd=false; shift ;;
--disable-colltrace) collective_trace=false; shift ;;
-f | --fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; shift ;;
-h | --help) display_help; exit 0 ;;
-i | --install) install_library=true; shift ;;
-j | --jobs) num_parallel_jobs=${2}; shift 2 ;;
-l | --local_gpu_only) build_local_gpu_only=true; shift ;;
--no_clean) clean_build=false; shift ;;
--npkit-enable) npkit_enabled=true; shift ;;
-p | --package_build) build_package=true; shift ;;
--prefix) install_prefix=${2}; shift 2 ;;
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
-r | --run_tests_quick) run_tests=true; shift ;;
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
--static) build_static=true; shift ;;
-t | --tests_build) build_tests=true; shift ;;
--time-trace) time_trace=true; shift ;;
--verbose) build_verbose=1; shift ;;
--address-sanitizer) build_address_sanitizer=true; shift ;;
--build_allreduce_only) build_allreduce_only=true; shift ;;
-d | --dependencies) install_dependencies=true; shift ;;
--debug) build_release=false; shift ;;
--disable_backtrace) build_bfd=false; shift ;;
--disable-colltrace) collective_trace=false; shift ;;
--disable-msccl-kernel) msccl_kernel_enabled=false; shift ;;
-f | --fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; msccl_kernel_enabled=false; shift ;;
-h | --help) display_help; exit 0 ;;
-i | --install) install_library=true; shift ;;
-j | --jobs) num_parallel_jobs=${2}; shift 2 ;;
-l | --local_gpu_only) build_local_gpu_only=true; shift ;;
--no_clean) clean_build=false; shift ;;
--npkit-enable) npkit_enabled=true; shift ;;
-p | --package_build) build_package=true; shift ;;
--prefix) install_prefix=${2}; shift 2 ;;
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
-r | --run_tests_quick) run_tests=true; shift ;;
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
--static) build_static=true; shift ;;
-t | --tests_build) build_tests=true; shift ;;
--time-trace) time_trace=true; shift ;;
--verbose) build_verbose=1; shift ;;
--) shift ; break ;;
*) echo "Unexpected command line parameter received; aborting";
exit 1
@@ -208,6 +211,10 @@ if [[ "${collective_trace}" == false ]]; then
cmake_common_options="${cmake_common_options} -DCOLLTRACE=OFF"
fi
if [[ "${msccl_kernel_enabled}" == false ]]; then
cmake_common_options="${cmake_common_options} -DENABLE_MSCCL_KERNEL=OFF"
fi
# Install dependencies
if ($install_dependencies); then
cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON"
+4
View File
@@ -30,7 +30,11 @@ static bool mscclSchedulerTriedLoadAlgo = false;
static std::mutex mscclLifecycleMutex;
bool mscclEnabled() {
#ifdef COMPILE_MSCCL_KERNEL
return rcclParamMscclEnabled();
#else
return false;
#endif
}
void mscclSetIsCallerFlag() {
+4
View File
@@ -9,7 +9,9 @@
#include "transport.h"
#include "msccl/msccl_lifecycle.h"
#ifdef COMPILE_MSCCL_KERNEL
#include "msccl/msccl_kernel.h"
#endif
#include "msccl/msccl_setup.h"
#include "msccl/msccl_status.h"
@@ -243,7 +245,9 @@ static ncclResult_t hostToDevRedOp(
// Except for ncclDevPreMulSum and ncclDevSumPostDiv required by ncclAvg
void* mscclKernelEntries[(ncclNumDevRedOps - 2) * ncclNumTypes * NCCL_NUM_PROTOCOLS] = {
#ifdef COMPILE_MSCCL_KERNEL
MSCCL_KERNEL_ENTRY()
#endif
};
ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count,