diff --git a/projects/rccl/CMakeLists.txt b/projects/rccl/CMakeLists.txt index f48a8d7926..31e7336efc 100644 --- a/projects/rccl/CMakeLists.txt +++ b/projects/rccl/CMakeLists.txt @@ -793,6 +793,12 @@ if(ENABLE_AMDSMI) else() list(APPEND SRC_FILES src/misc/rocm_smi_wrap.cc) endif() +else() + # When SMI is disabled, compile the shim to provide dummy symbols (rsmi_init) + # This satisfies external dependencies (like PyTorch) that expect SMI symbols + # to be present, preventing them from failing to load or trying to load + # the broken system library. + list(APPEND SRC_FILES src/misc/smi_shim.cc) endif() list(APPEND SRC_FILES ${SMI_SOURCES}) diff --git a/projects/rccl/src/misc/smi_shim.cc b/projects/rccl/src/misc/smi_shim.cc new file mode 100644 index 0000000000..9760c51e09 --- /dev/null +++ b/projects/rccl/src/misc/smi_shim.cc @@ -0,0 +1,10 @@ +#include + +extern "C" { + // Dummy implementation of rsmi_init to satisfy linker dependencies + // when the real ROCm SMI library is broken or causes Bus Errors (gfx1151). + // Returns 0 (RSMI_STATUS_SUCCESS). + int rsmi_init(uint64_t flags) { + return 0; + } +}