Implement disassembling library into assembly with source code (#1714)
- Add --dump-asm to install.sh dump assembly from RCCL library
Tá an tiomantas seo le fáil i:
tiomanta ag
GitHub
tuismitheoir
c1e1f2faeb
tiomantas
8d6e21285c
@@ -24,6 +24,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on
|
||||
option(BUILD_SHARED_LIBS "Build as shared library" ON)
|
||||
option(BUILD_TESTS "Build unit test programs" OFF)
|
||||
option(COLLTRACE "Collective Trace Option" ON)
|
||||
option(DUMP_ASM "Disassemble and dump" OFF)
|
||||
option(ENABLE_CODE_COVERAGE "Enable code coverage" OFF)
|
||||
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
|
||||
option(ENABLE_MSCCLPP "Enable MSCCL++" OFF)
|
||||
@@ -1185,6 +1186,26 @@ if (HAVE_KERNARG_PRELOAD)
|
||||
target_compile_options(rccl PRIVATE -mllvm --amdgpu-kernarg-preload-count=16)
|
||||
endif()
|
||||
|
||||
if (DUMP_ASM) # Save temporary files from kernel compilation
|
||||
message(STATUS "Disassembling librccl.so to asm")
|
||||
# Maintain symbols but without changing code. Keep additional data in dwarf section of binary.
|
||||
target_compile_options(rccl PRIVATE -gline-tables-only)
|
||||
set(OBJ_DUMP ${ROCM_PATH}/llvm/bin/llvm-objdump)
|
||||
|
||||
add_custom_command(TARGET rccl POST_BUILD
|
||||
COMMENT "Disassembling RCCL library"
|
||||
COMMAND /bin/bash -c "${OBJ_DUMP} --offload-fatbin librccl.so"
|
||||
VERBATIM
|
||||
)
|
||||
foreach(GPUARCH ${GPU_TARGETS})
|
||||
add_custom_command(TARGET rccl POST_BUILD
|
||||
COMMENT "Disassembling RCCL library to dump assembly for ${GPUARCH}"
|
||||
COMMAND /bin/bash -c "${OBJ_DUMP} -d -l --source --symbolize-operands librccl.so.0.hipv4-amdgcn-amd-amdhsa--${GPUARCH} > librccl.${GPUARCH}.s"
|
||||
VERBATIM
|
||||
)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
## NOTE: This is currently being handled by rocm-cmake, however may need to be re-enabled in the future
|
||||
#foreach(target ${GPU_TARGETS})
|
||||
# target_compile_options(rccl PRIVATE --offload-arch=${target})
|
||||
|
||||
@@ -19,6 +19,7 @@ build_tests=false
|
||||
build_verbose=false
|
||||
clean_build=true
|
||||
collective_trace=true
|
||||
dump_asm=false
|
||||
enable_code_coverage=false
|
||||
enable_ninja=""
|
||||
install_dependencies=false
|
||||
@@ -52,6 +53,7 @@ function display_help()
|
||||
echo " --enable_backtrace Build with custom backtrace support"
|
||||
echo " --disable-colltrace Build without collective trace"
|
||||
echo " --disable-msccl-kernel Build without MSCCL kernels"
|
||||
echo " --dump-asm Disassemble code and dump assembly with inline code"
|
||||
echo " --enable-mscclpp Build with MSCCL++ support"
|
||||
echo " --enable-mscclpp-clip Build MSCCL++ with clip wrapper on bfloat16 and half addition routines"
|
||||
echo " --disable-roctx Build without ROCTX logging"
|
||||
@@ -84,7 +86,7 @@ function display_help()
|
||||
# check if we have a modern version of getopt that can handle whitespace and long parameters
|
||||
getopt -T
|
||||
if [[ "$?" -eq 4 ]]; then
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@")
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@")
|
||||
else
|
||||
echo "Need a new version of getopt"
|
||||
exit 1
|
||||
@@ -106,6 +108,7 @@ while true; do
|
||||
--enable_backtrace) build_bfd=true; shift ;;
|
||||
--disable-colltrace) collective_trace=false; shift ;;
|
||||
--disable-msccl-kernel) msccl_kernel_enabled=false; shift ;;
|
||||
--dump-asm) dump_asm=true; shift ;;
|
||||
--enable-mscclpp) mscclpp_enabled=true; shift ;;
|
||||
--enable-mscclpp-clip) enable_mscclpp_clip=true; shift ;;
|
||||
--disable-roctx) roctx_enabled=false; shift ;;
|
||||
@@ -278,6 +281,11 @@ if [[ "${roctx_enabled}" == false ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DROCTX=OFF"
|
||||
fi
|
||||
|
||||
# Dump ASM files from GPU compilation
|
||||
if [[ "${dump_asm}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DDUMP_ASM=ON"
|
||||
fi
|
||||
|
||||
# Enable OpenMP in unit tests
|
||||
if [[ "${openmp_test_enabled}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DOPENMP_TESTS_ENABLED=ON"
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir