2
0
* NVTX support
Este cometimento está contido em:
Bertan Dogancay
2024-02-08 14:08:24 -07:00
cometido por GitHub
ascendente 5257c753c5
cometimento 8a442faa12
4 ficheiros modificados com 20 adições e 4 eliminações
+7 -2
Ver ficheiro
@@ -19,6 +19,7 @@ option(COLLTRACE "Collective Trace Option"
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON) option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
option(ENABLE_IFC "Enable indirect function call" OFF) option(ENABLE_IFC "Enable indirect function call" OFF)
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF) option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
option(NVTX "Enable NVTX" OFF)
option(PROFILE "Enable profiling" OFF) option(PROFILE "Enable profiling" OFF)
option(TIMETRACE "Enable time-trace during compilation" OFF) option(TIMETRACE "Enable time-trace during compilation" OFF)
option(TRACE "Enable additional tracing" OFF) option(TRACE "Enable additional tracing" OFF)
@@ -411,7 +412,7 @@ set(SRC_FILES
src/include/trees.h src/include/trees.h
src/include/utils.h src/include/utils.h
src/init.cc src/init.cc
# src/init_nvtx.cc src/init_nvtx.cc
src/misc/archinfo.cc src/misc/archinfo.cc
src/misc/argcheck.cc src/misc/argcheck.cc
# src/misc/cudawrap.cc # src/misc/cudawrap.cc
@@ -484,6 +485,8 @@ foreach(SRC_FILE ${SRC_FILES})
) )
endforeach() endforeach()
# Generate device/host tables and all the collective functions that are going to be in librccl.so
#==================================================================================================
if(ONLY_FUNCS) if(ONLY_FUNCS)
## Generate only the specified functions ## Generate only the specified functions
gen_functions(${ONLY_FUNCS}) gen_functions(${ONLY_FUNCS})
@@ -525,7 +528,9 @@ if(DEMANGLE_DIR)
endif() endif()
## Set RCCL compile definitions ## Set RCCL compile definitions
target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) # NVTX is not supported if(NOT NVTX)
target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL)
endif()
if(COLLTRACE) if(COLLTRACE)
target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE) target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE)
endif() endif()
-1
Ver ficheiro
@@ -21,7 +21,6 @@
# SOFTWARE. # SOFTWARE.
set(ALL_PARAMS "ALL_COLLS" "ALL_ALGOS" "ALL_PROTOS" "ALL_REDOPS" "ALL_TYPES") set(ALL_PARAMS "ALL_COLLS" "ALL_ALGOS" "ALL_PROTOS" "ALL_REDOPS" "ALL_TYPES")
set(ALL_COLLS "AllGather" "AllReduce" "AllToAllPivot" "Broadcast" "Reduce" "ReduceScatter" "SendRecv") set(ALL_COLLS "AllGather" "AllReduce" "AllToAllPivot" "Broadcast" "Reduce" "ReduceScatter" "SendRecv")
set(ALL_ALGOS "TREE" "RING" "COLLNET_DIRECT" "COLLNET_CHAIN") set(ALL_ALGOS "TREE" "RING" "COLLNET_DIRECT" "COLLNET_CHAIN")
set(ALL_PROTOS "LL" "LL128" "SIMPLE") set(ALL_PROTOS "LL" "LL128" "SIMPLE")
+9 -1
Ver ficheiro
@@ -25,6 +25,7 @@ install_library=false
msccl_kernel_enabled=true msccl_kernel_enabled=true
num_parallel_jobs=$(nproc) num_parallel_jobs=$(nproc)
npkit_enabled=false npkit_enabled=false
nvtx_enabled=false
run_tests=false run_tests=false
run_tests_all=false run_tests_all=false
time_trace=false time_trace=false
@@ -50,6 +51,7 @@ function display_help()
echo " --amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)" echo " --amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)"
echo " --no_clean Don't delete files if they already exist" echo " --no_clean Don't delete files if they already exist"
echo " --npkit-enable Compile with npkit enabled" echo " --npkit-enable Compile with npkit enabled"
echo " --nvtx-enable Compile with nvtx enabled"
echo " -p|--package_build Build RCCL package" echo " -p|--package_build Build RCCL package"
echo " --prefix Specify custom directory to install RCCL to (default: /opt/rocm)" echo " --prefix Specify custom directory to install RCCL to (default: /opt/rocm)"
echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility" echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility"
@@ -68,7 +70,7 @@ function display_help()
# check if we have a modern version of getopt that can handle whitespace and long parameters # check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T getopt -T
if [[ $? -eq 4 ]]; then if [[ $? -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@") GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,nvtx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
else else
echo "Need a new version of getopt" echo "Need a new version of getopt"
exit 1 exit 1
@@ -97,6 +99,7 @@ while true; do
--amdgpu_targets) build_amdgpu_targets=${2}; shift 2 ;; --amdgpu_targets) build_amdgpu_targets=${2}; shift 2 ;;
--no_clean) clean_build=false; shift ;; --no_clean) clean_build=false; shift ;;
--npkit-enable) npkit_enabled=true; shift ;; --npkit-enable) npkit_enabled=true; shift ;;
--nvtx-enable) nvtx_enabled=true; shift ;;
-p | --package_build) build_package=true; shift ;; -p | --package_build) build_package=true; shift ;;
--prefix) install_prefix=${2}; shift 2 ;; --prefix) install_prefix=${2}; shift 2 ;;
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;; --rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
@@ -220,6 +223,11 @@ if ($install_dependencies); then
cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON" cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON"
fi fi
# Enable NVTX
if [[ "${nvtx_enabled}" == true ]]; then
cmake_common_options="${cmake_common_options} -DNVTX=ON"
fi
cmake_executable=cmake cmake_executable=cmake
case "${OS_ID}" in case "${OS_ID}" in
centos|rhel) centos|rhel)
+4
Ver ficheiro
@@ -60,6 +60,10 @@ static __inline__ int ncclTypeSize(ncclDataType_t type) {
#include "alloc.h" #include "alloc.h"
#include "utils.h" #include "utils.h"
#include "param.h" #include "param.h"
#ifdef NVTX_NO_IMPL
#include "nvtx_stub.h" #include "nvtx_stub.h"
#else
#include "nvtx.h"
#endif
#endif // end include guard #endif // end include guard