Implement disassembling library into assembly with source code (#1714)

- Add --dump-asm to install.sh dump assembly from RCCL library
Tento commit je obsažen v:
alex-breslow-amd
2025-09-23 10:11:32 -07:00
odevzdal GitHub
rodič c1e1f2faeb
revize 8d6e21285c
2 změnil soubory, kde provedl 30 přidání a 1 odebrání
+21
Zobrazit soubor
@@ -24,6 +24,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on
option(BUILD_SHARED_LIBS "Build as shared library" ON)
option(BUILD_TESTS "Build unit test programs" OFF)
option(COLLTRACE "Collective Trace Option" ON)
option(DUMP_ASM "Disassemble and dump" OFF)
option(ENABLE_CODE_COVERAGE "Enable code coverage" OFF)
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
option(ENABLE_MSCCLPP "Enable MSCCL++" OFF)
@@ -1185,6 +1186,26 @@ if (HAVE_KERNARG_PRELOAD)
target_compile_options(rccl PRIVATE -mllvm --amdgpu-kernarg-preload-count=16)
endif()
if (DUMP_ASM) # Save temporary files from kernel compilation
message(STATUS "Disassembling librccl.so to asm")
# Maintain symbols but without changing code. Keep additional data in dwarf section of binary.
target_compile_options(rccl PRIVATE -gline-tables-only)
set(OBJ_DUMP ${ROCM_PATH}/llvm/bin/llvm-objdump)
add_custom_command(TARGET rccl POST_BUILD
COMMENT "Disassembling RCCL library"
COMMAND /bin/bash -c "${OBJ_DUMP} --offload-fatbin librccl.so"
VERBATIM
)
foreach(GPUARCH ${GPU_TARGETS})
add_custom_command(TARGET rccl POST_BUILD
COMMENT "Disassembling RCCL library to dump assembly for ${GPUARCH}"
COMMAND /bin/bash -c "${OBJ_DUMP} -d -l --source --symbolize-operands librccl.so.0.hipv4-amdgcn-amd-amdhsa--${GPUARCH} > librccl.${GPUARCH}.s"
VERBATIM
)
endforeach()
endif()
## NOTE: This is currently being handled by rocm-cmake, however may need to be re-enabled in the future
#foreach(target ${GPU_TARGETS})
# target_compile_options(rccl PRIVATE --offload-arch=${target})
+9 -1
Zobrazit soubor
@@ -19,6 +19,7 @@ build_tests=false
build_verbose=false
clean_build=true
collective_trace=true
dump_asm=false
enable_code_coverage=false
enable_ninja=""
install_dependencies=false
@@ -52,6 +53,7 @@ function display_help()
echo " --enable_backtrace Build with custom backtrace support"
echo " --disable-colltrace Build without collective trace"
echo " --disable-msccl-kernel Build without MSCCL kernels"
echo " --dump-asm Disassemble code and dump assembly with inline code"
echo " --enable-mscclpp Build with MSCCL++ support"
echo " --enable-mscclpp-clip Build MSCCL++ with clip wrapper on bfloat16 and half addition routines"
echo " --disable-roctx Build without ROCTX logging"
@@ -84,7 +86,7 @@ function display_help()
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ "$?" -eq 4 ]]; then
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@")
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@")
else
echo "Need a new version of getopt"
exit 1
@@ -106,6 +108,7 @@ while true; do
--enable_backtrace) build_bfd=true; shift ;;
--disable-colltrace) collective_trace=false; shift ;;
--disable-msccl-kernel) msccl_kernel_enabled=false; shift ;;
--dump-asm) dump_asm=true; shift ;;
--enable-mscclpp) mscclpp_enabled=true; shift ;;
--enable-mscclpp-clip) enable_mscclpp_clip=true; shift ;;
--disable-roctx) roctx_enabled=false; shift ;;
@@ -278,6 +281,11 @@ if [[ "${roctx_enabled}" == false ]]; then
cmake_common_options="${cmake_common_options} -DROCTX=OFF"
fi
# Dump ASM files from GPU compilation
if [[ "${dump_asm}" == true ]]; then
cmake_common_options="${cmake_common_options} -DDUMP_ASM=ON"
fi
# Enable OpenMP in unit tests
if [[ "${openmp_test_enabled}" == true ]]; then
cmake_common_options="${cmake_common_options} -DOPENMP_TESTS_ENABLED=ON"