From 8d6e21285c26eba772eba78d27764d559281d3ab Mon Sep 17 00:00:00 2001 From: alex-breslow-amd Date: Tue, 23 Sep 2025 10:11:32 -0700 Subject: [PATCH] Implement disassembling library into assembly with source code (#1714) - Add --dump-asm to install.sh dump assembly from RCCL library --- CMakeLists.txt | 21 +++++++++++++++++++++ install.sh | 10 +++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a823248740..c8281f23ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on option(BUILD_SHARED_LIBS "Build as shared library" ON) option(BUILD_TESTS "Build unit test programs" OFF) option(COLLTRACE "Collective Trace Option" ON) +option(DUMP_ASM "Disassemble and dump" OFF) option(ENABLE_CODE_COVERAGE "Enable code coverage" OFF) option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON) option(ENABLE_MSCCLPP "Enable MSCCL++" OFF) @@ -1185,6 +1186,26 @@ if (HAVE_KERNARG_PRELOAD) target_compile_options(rccl PRIVATE -mllvm --amdgpu-kernarg-preload-count=16) endif() +if (DUMP_ASM) # Save temporary files from kernel compilation + message(STATUS "Disassembling librccl.so to asm") + # Maintain symbols but without changing code. Keep additional data in dwarf section of binary. + target_compile_options(rccl PRIVATE -gline-tables-only) + set(OBJ_DUMP ${ROCM_PATH}/llvm/bin/llvm-objdump) + + add_custom_command(TARGET rccl POST_BUILD + COMMENT "Disassembling RCCL library" + COMMAND /bin/bash -c "${OBJ_DUMP} --offload-fatbin librccl.so" + VERBATIM + ) + foreach(GPUARCH ${GPU_TARGETS}) + add_custom_command(TARGET rccl POST_BUILD + COMMENT "Disassembling RCCL library to dump assembly for ${GPUARCH}" + COMMAND /bin/bash -c "${OBJ_DUMP} -d -l --source --symbolize-operands librccl.so.0.hipv4-amdgcn-amd-amdhsa--${GPUARCH} > librccl.${GPUARCH}.s" + VERBATIM + ) + endforeach() +endif() + ## NOTE: This is currently being handled by rocm-cmake, however may need to be re-enabled in the future #foreach(target ${GPU_TARGETS}) # target_compile_options(rccl PRIVATE --offload-arch=${target}) diff --git a/install.sh b/install.sh index 3b4c55a889..6fa49d6d51 100755 --- a/install.sh +++ b/install.sh @@ -19,6 +19,7 @@ build_tests=false build_verbose=false clean_build=true collective_trace=true +dump_asm=false enable_code_coverage=false enable_ninja="" install_dependencies=false @@ -52,6 +53,7 @@ function display_help() echo " --enable_backtrace Build with custom backtrace support" echo " --disable-colltrace Build without collective trace" echo " --disable-msccl-kernel Build without MSCCL kernels" + echo " --dump-asm Disassemble code and dump assembly with inline code" echo " --enable-mscclpp Build with MSCCL++ support" echo " --enable-mscclpp-clip Build MSCCL++ with clip wrapper on bfloat16 and half addition routines" echo " --disable-roctx Build without ROCTX logging" @@ -84,7 +86,7 @@ function display_help() # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ "$?" -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprt --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,verbose -- "$@") else echo "Need a new version of getopt" exit 1 @@ -106,6 +108,7 @@ while true; do --enable_backtrace) build_bfd=true; shift ;; --disable-colltrace) collective_trace=false; shift ;; --disable-msccl-kernel) msccl_kernel_enabled=false; shift ;; + --dump-asm) dump_asm=true; shift ;; --enable-mscclpp) mscclpp_enabled=true; shift ;; --enable-mscclpp-clip) enable_mscclpp_clip=true; shift ;; --disable-roctx) roctx_enabled=false; shift ;; @@ -278,6 +281,11 @@ if [[ "${roctx_enabled}" == false ]]; then cmake_common_options="${cmake_common_options} -DROCTX=OFF" fi +# Dump ASM files from GPU compilation +if [[ "${dump_asm}" == true ]]; then + cmake_common_options="${cmake_common_options} -DDUMP_ASM=ON" +fi + # Enable OpenMP in unit tests if [[ "${openmp_test_enabled}" == true ]]; then cmake_common_options="${cmake_common_options} -DOPENMP_TESTS_ENABLED=ON"