From cd91b85935d8b8017feb76b7b7984b44d08a6261 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Sun, 1 Feb 2026 10:27:12 +0000 Subject: [PATCH] Fix: Provide inline dummy SMI symbols when SMI is disabled to prevent link errors --- projects/rccl/src/include/amdsmi_wrap.h | 9 +++++++++ projects/rccl/src/include/rocm_smi_wrap.h | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/projects/rccl/src/include/amdsmi_wrap.h b/projects/rccl/src/include/amdsmi_wrap.h index 8436e72080..0e8918e5bc 100644 --- a/projects/rccl/src/include/amdsmi_wrap.h +++ b/projects/rccl/src/include/amdsmi_wrap.h @@ -7,11 +7,20 @@ #include "amd_smi/amdsmi.h" #include "nccl.h" +#ifdef USE_AMDSMI ncclResult_t amd_smi_init(); ncclResult_t amd_smi_shutdown(); ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs); ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len); ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex); ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count); +#else +inline ncclResult_t amd_smi_init() { return ncclSuccess; } +inline ncclResult_t amd_smi_shutdown() { return ncclSuccess; } +inline ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; } +inline ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; } +inline ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { return ncclInternalError; } +inline ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; } +#endif #endif diff --git a/projects/rccl/src/include/rocm_smi_wrap.h b/projects/rccl/src/include/rocm_smi_wrap.h index 87b1f2a72f..6787ed1307 100644 --- a/projects/rccl/src/include/rocm_smi_wrap.h +++ b/projects/rccl/src/include/rocm_smi_wrap.h @@ -29,10 +29,18 @@ THE SOFTWARE. #endif #include "nccl.h" +#ifdef USE_ROCMSMI ncclResult_t rocm_smi_init(); ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs); ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len); ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex); ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count); +#else +inline ncclResult_t rocm_smi_init() { return ncclSuccess; } +inline ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; } +inline ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; } +inline ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { return ncclInternalError; } +inline ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; } +#endif #endif