Dump compiler-determined GPU kernel resource usage (#1965)
Adds --kernel-resource-use flag to install.sh to allow dumping per-GPU kernel resource use at compile time (e.g., VGPRs, LDS, SGPRs, scratch, etc.)
Этот коммит содержится в:
коммит произвёл
alex-breslow-amd
родитель
97f2665da2
Коммит
ff209e5b19
@@ -35,6 +35,7 @@ option(ENABLE_NPKIT "Enable NPKit"
|
||||
option(ENABLE_IFC "Enable indirect function call" OFF)
|
||||
option(GENERATE_SYM_KERNELS "Generate symmetric memory kernels" OFF)
|
||||
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
|
||||
option(REPORT_KERNEL_RESOURCE_USE "Append -Rpass-analysis=kernel to CXX flags" OFF)
|
||||
option(ROCTX "Enable ROCTX" ON)
|
||||
option(PROFILE "Enable profiling" OFF)
|
||||
option(TIMETRACE "Enable time-trace during compilation" OFF)
|
||||
@@ -1186,6 +1187,10 @@ if (HAVE_KERNARG_PRELOAD)
|
||||
target_compile_options(rccl PRIVATE -mllvm --amdgpu-kernarg-preload-count=16)
|
||||
endif()
|
||||
|
||||
if (REPORT_KERNEL_RESOURCE_USE)
|
||||
target_link_options(rccl PRIVATE -Rpass-analysis=kernel-resource-usage)
|
||||
endif()
|
||||
|
||||
if (DUMP_ASM) # Save temporary files from kernel compilation
|
||||
message(STATUS "Disassembling librccl.so to asm")
|
||||
# Maintain symbols but without changing code. Keep additional data in dwarf section of binary.
|
||||
|
||||
+8
-1
@@ -32,6 +32,7 @@ enable_mscclpp_clip=false
|
||||
num_parallel_jobs=$(nproc)
|
||||
npkit_enabled=false
|
||||
openmp_test_enabled=false
|
||||
kernel_resource_use=false
|
||||
roctx_enabled=true
|
||||
run_tests=false
|
||||
run_tests_all=false
|
||||
@@ -61,6 +62,7 @@ function display_help()
|
||||
echo " -h|--help Prints this help message"
|
||||
echo " -i|--install Install RCCL library (see --prefix argument below)"
|
||||
echo " -j|--jobs Specify how many parallel compilation jobs to run ($num_parallel_jobs by default)"
|
||||
echo " --kernel-resource-use Dump GPU kernel resource usage (e.g., VGPRs, scratch, spill) at link stage"
|
||||
echo " -l|--local_gpu_only Only compile for local GPU architecture"
|
||||
echo " --amdgpu_targets Only compile for specified GPU architecture(s). For multiple targets, separate by ';' (builds for all supported GPU architectures by default)"
|
||||
echo " --no_clean Don't delete files if they already exist"
|
||||
@@ -86,7 +88,7 @@ function display_help()
|
||||
# check if we have a modern version of getopt that can handle whitespace and long parameters
|
||||
getopt -T
|
||||
if [[ "$?" -eq 4 ]]; then
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@")
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,kernel-resource-use,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@")
|
||||
else
|
||||
echo "Need a new version of getopt"
|
||||
exit 1
|
||||
@@ -116,6 +118,7 @@ while true; do
|
||||
-h | --help) display_help; exit 0 ;;
|
||||
-i | --install) install_library=true; shift ;;
|
||||
-j | --jobs) num_parallel_jobs=${2}; shift 2 ;;
|
||||
--kernel-resource-use) kernel_resource_use=true; shift ;;
|
||||
-l | --local_gpu_only) build_local_gpu_only=true; shift ;;
|
||||
--amdgpu_targets) build_amdgpu_targets=${2}; shift 2 ;;
|
||||
--no_clean) clean_build=false; shift ;;
|
||||
@@ -271,6 +274,10 @@ if [[ "${install_library}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DCMAKE_INSTALL_PREFIX=${install_prefix}"
|
||||
fi
|
||||
|
||||
if [[ "${kernel_resource_use}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DREPORT_KERNEL_RESOURCE_USE=ON"
|
||||
fi
|
||||
|
||||
# Enable trace debug level
|
||||
if [[ "${log_trace}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DTRACE=ON"
|
||||
|
||||
Ссылка в новой задаче
Block a user