Fix(critical): Prevent ncclInternalError when SMI is disabled by mocking getDeviceIndexByPciBusId

Bu işleme şunda yer alıyor:
Donato Capitella
2026-02-01 12:48:18 +00:00
ebeveyn 532214edfb
işleme a2686c9f41
+1 -1
Dosyayı Görüntüle
@@ -39,7 +39,7 @@ ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsm
inline ncclResult_t rocm_smi_init() { return ncclSuccess; }
inline ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; }
inline ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; }
inline ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { return ncclInternalError; }
inline ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { *deviceIndex = 0; return ncclSuccess; }
inline ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; }
#endif