Fix: Provide inline dummy SMI symbols when SMI is disabled to prevent link errors

This commit is contained in:
Donato Capitella
2026-02-01 10:27:12 +00:00
parent 484bd5bf0f
commit cd91b85935
2 changed files with 17 additions and 0 deletions
+9
View File
@@ -7,11 +7,20 @@
#include "amd_smi/amdsmi.h" #include "amd_smi/amdsmi.h"
#include "nccl.h" #include "nccl.h"
#ifdef USE_AMDSMI
ncclResult_t amd_smi_init(); ncclResult_t amd_smi_init();
ncclResult_t amd_smi_shutdown(); ncclResult_t amd_smi_shutdown();
ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs); ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs);
ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len); ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len);
ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex); ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex);
ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count); ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count);
#else
inline ncclResult_t amd_smi_init() { return ncclSuccess; }
inline ncclResult_t amd_smi_shutdown() { return ncclSuccess; }
inline ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; }
inline ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; }
inline ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { return ncclInternalError; }
inline ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; }
#endif
#endif #endif
@@ -29,10 +29,18 @@ THE SOFTWARE.
#endif #endif
#include "nccl.h" #include "nccl.h"
#ifdef USE_ROCMSMI
ncclResult_t rocm_smi_init(); ncclResult_t rocm_smi_init();
ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs); ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs);
ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len); ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len);
ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex); ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex);
ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count); ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count);
#else
inline ncclResult_t rocm_smi_init() { return ncclSuccess; }
inline ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; }
inline ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; }
inline ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { return ncclInternalError; }
inline ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; }
#endif
#endif #endif