Nvtx support (#1076)

* NVTX support
此提交包含在:
Bertan Dogancay
2024-02-08 14:08:24 -07:00
提交者 GitHub
父節點 5257c753c5
當前提交 8a442faa12
共有 4 個檔案被更改,包括 20 行新增4 行删除
+7 -2
查看文件
@@ -19,6 +19,7 @@ option(COLLTRACE "Collective Trace Option"
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
option(ENABLE_IFC "Enable indirect function call" OFF)
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
option(NVTX "Enable NVTX" OFF)
option(PROFILE "Enable profiling" OFF)
option(TIMETRACE "Enable time-trace during compilation" OFF)
option(TRACE "Enable additional tracing" OFF)
@@ -411,7 +412,7 @@ set(SRC_FILES
src/include/trees.h
src/include/utils.h
src/init.cc
# src/init_nvtx.cc
src/init_nvtx.cc
src/misc/archinfo.cc
src/misc/argcheck.cc
# src/misc/cudawrap.cc
@@ -484,6 +485,8 @@ foreach(SRC_FILE ${SRC_FILES})
)
endforeach()
# Generate device/host tables and all the collective functions that are going to be in librccl.so
#==================================================================================================
if(ONLY_FUNCS)
## Generate only the specified functions
gen_functions(${ONLY_FUNCS})
@@ -525,7 +528,9 @@ if(DEMANGLE_DIR)
endif()
## Set RCCL compile definitions
target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) # NVTX is not supported
if(NOT NVTX)
target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL)
endif()
if(COLLTRACE)
target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE)
endif()
-1
查看文件
@@ -21,7 +21,6 @@
# SOFTWARE.
set(ALL_PARAMS "ALL_COLLS" "ALL_ALGOS" "ALL_PROTOS" "ALL_REDOPS" "ALL_TYPES")
set(ALL_COLLS "AllGather" "AllReduce" "AllToAllPivot" "Broadcast" "Reduce" "ReduceScatter" "SendRecv")
set(ALL_ALGOS "TREE" "RING" "COLLNET_DIRECT" "COLLNET_CHAIN")
set(ALL_PROTOS "LL" "LL128" "SIMPLE")
+9 -1
查看文件
@@ -25,6 +25,7 @@ install_library=false
msccl_kernel_enabled=true
num_parallel_jobs=$(nproc)
npkit_enabled=false
nvtx_enabled=false
run_tests=false
run_tests_all=false
time_trace=false
@@ -50,6 +51,7 @@ function display_help()
echo " --amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, seperate by ';' (builds for all supported GPU architectures by default)"
echo " --no_clean Don't delete files if they already exist"
echo " --npkit-enable Compile with npkit enabled"
echo " --nvtx-enable Compile with nvtx enabled"
echo " -p|--package_build Build RCCL package"
echo " --prefix Specify custom directory to install RCCL to (default: /opt/rocm)"
echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility"
@@ -68,7 +70,7 @@ function display_help()
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
GETOPT_PARSE=$(getopt --name "${0}" --options dfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable_backtrace,disable-colltrace,disable-msccl-kernel,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,nvtx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,verbose -- "$@")
else
echo "Need a new version of getopt"
exit 1
@@ -97,6 +99,7 @@ while true; do
--amdgpu_targets) build_amdgpu_targets=${2}; shift 2 ;;
--no_clean) clean_build=false; shift ;;
--npkit-enable) npkit_enabled=true; shift ;;
--nvtx-enable) nvtx_enabled=true; shift ;;
-p | --package_build) build_package=true; shift ;;
--prefix) install_prefix=${2}; shift 2 ;;
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
@@ -220,6 +223,11 @@ if ($install_dependencies); then
cmake_common_options="${cmake_common_options} -DINSTALL_DEPENDENCIES=ON"
fi
# Enable NVTX
if [[ "${nvtx_enabled}" == true ]]; then
cmake_common_options="${cmake_common_options} -DNVTX=ON"
fi
cmake_executable=cmake
case "${OS_ID}" in
centos|rhel)
+4
查看文件
@@ -60,6 +60,10 @@ static __inline__ int ncclTypeSize(ncclDataType_t type) {
#include "alloc.h"
#include "utils.h"
#include "param.h"
#ifdef NVTX_NO_IMPL
#include "nvtx_stub.h"
#else
#include "nvtx.h"
#endif
#endif // end include guard