diff --git a/makefiles/common.mk b/makefiles/common.mk index 6ba9bbfce0..0f01671b67 100644 --- a/makefiles/common.mk +++ b/makefiles/common.mk @@ -76,7 +76,7 @@ $(info NVCC_GENCODE is ${NVCC_GENCODE}) ifeq ($(shell test "0$(CUDA_MAJOR)" -ge 13; echo $$?),0) CXXSTD ?= -std=c++17 else - CXXSTD ?= -std=c++11 + CXXSTD ?= -std=c++14 endif CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) -fPIC -fvisibility=hidden \ diff --git a/makefiles/version.mk b/makefiles/version.mk index 013e972f34..0f482d31af 100644 --- a/makefiles/version.mk +++ b/makefiles/version.mk @@ -1,6 +1,6 @@ ##### version NCCL_MAJOR := 2 NCCL_MINOR := 27 -NCCL_PATCH := 5 +NCCL_PATCH := 6 NCCL_SUFFIX := PKG_REVISION := 1 diff --git a/src/graph/paths.cc b/src/graph/paths.cc index 4b44abd01f..82c0d99726 100644 --- a/src/graph/paths.cc +++ b/src/graph/paths.cc @@ -709,8 +709,8 @@ ncclResult_t ncclTopoComputePaths(struct ncclTopoSystem* system, struct ncclComm peerNode->paths[GPU][g].type <= PATH_NVL && /* and (3) is on the same node as us */ NCCL_TOPO_ID_SYSTEM_ID(peerNode->id) == NCCL_TOPO_ID_SYSTEM_ID(gpu->id) && - /* and (4) has either higher bw to that NIC or avoid going through the CPU*/ - (peerNode->paths[NET][n].bw > gpu->paths[NET][n].bw || gpu->paths[NET][n].type > pxnType)) + /* and (4) has either higher bw to that NIC or avoid going through the CPU (path.type is > PATH_PXN)*/ + (peerNode->paths[NET][n].bw > gpu->paths[NET][n].bw || gpu->paths[NET][n].type > PATH_PXN)) // We can use that GPU as relay to communicate with that NIC. // Only enabling it in the GPU->NIC direction for now to favor // receiving locally and sending remotely (consistent with net.cc) diff --git a/src/graph/search.cc b/src/graph/search.cc index 67e6009060..86199d78b5 100644 --- a/src/graph/search.cc +++ b/src/graph/search.cc @@ -960,7 +960,7 @@ float sm90SpeedArrayInter[] = { 48.0, 45.0, 42.0, 40.0, 30.0, 24.0, 22.0, 20.0, #define NSPEEDSINTER_SM90 (sizeof(sm90SpeedArrayInter)/sizeof(float)) float sm100SpeedArrayIntra[] = { 90.0, 80.0, 70.0, 60.0, 50.0, 40.0, 30.0, 24.0, 20.0, 19.0, 18.0 }; -float sm100SpeedArrayInter[] = { 48.0, 45.1, 42.0, 40.0, 30.0, 24.0, 22.0, 20.0, 17.5, 15.0, 12.0, 6.0, 3.0, 2.4, 1.2, 0.24, 0.12 }; +float sm100SpeedArrayInter[] = { 96.0, 48.0, 45.1, 42.0, 40.0, 30.0, 24.0, 22.0, 20.0, 17.5, 15.0, 12.0, 6.0, 3.0, 2.4, 1.2, 0.24, 0.12 }; #define NSPEEDSINTRA_SM100 (sizeof(sm100SpeedArrayIntra)/sizeof(float)) #define NSPEEDSINTER_SM100 (sizeof(sm100SpeedArrayInter)/sizeof(float)) @@ -1307,7 +1307,8 @@ ncclResult_t ncclTopoGetNetDev(struct ncclComm* comm, int rank, struct ncclTopoG NCCLCHECK(ncclTopoGetLocalGpu(comm->topo, netId, &g2)); if (g2 != -1) { struct ncclTopoNode* peerGpu = comm->topo->nodes[GPU].nodes+g2; - if (peerGpu->paths[GPU][g1].type <= PATH_NVL && peerGpu->paths[NET][n].type <= PATH_PXB) { + int pxnType = ncclParamPxnC2c() ? PATH_P2C : PATH_PXB; + if (peerGpu->paths[GPU][g1].type <= PATH_NVL && peerGpu->paths[NET][n].type <= pxnType) { *proxyRank = peerGpu->gpu.rank; if (dev) *dev = netDev; if (id) *id = netId; diff --git a/src/graph/topo.h b/src/graph/topo.h index 9b49c0222d..9ef10ff2d3 100644 --- a/src/graph/topo.h +++ b/src/graph/topo.h @@ -98,6 +98,8 @@ extern const char* topoLinkTypeStr[]; #define PATH_DIS 11 extern const char* topoPathTypeStr[]; +extern int64_t ncclParamPxnC2c(); + struct ncclTopoNode; struct ncclTopoLink { int type; diff --git a/src/include/ibvcore.h b/src/include/ibvcore.h index 8d8ecf1ec8..ae9051f286 100644 --- a/src/include/ibvcore.h +++ b/src/include/ibvcore.h @@ -9,6 +9,7 @@ #include #include #include +#include #if __GNUC__ >= 3 # define __attribute_const __attribute__((const)) @@ -39,7 +40,7 @@ union ibv_gid { #define vext_field_avail(type, fld, sz) (offsetof(type, fld) < (sz)) /*XXX:__VERBS_ABI_IS_EXTENDED produces warning "integer operation result is out of range" with g++ 4.8.2*/ -//static void *__VERBS_ABI_IS_EXTENDED = ((uint8_t *)NULL) - 1; +static void *__VERBS_ABI_IS_EXTENDED = ((uint8_t *)NULL) - 1; enum ibv_node_type { IBV_NODE_UNKNOWN = -1, @@ -208,7 +209,9 @@ struct ibv_port_attr { uint8_t active_speed; uint8_t phys_state; uint8_t link_layer; - uint8_t reserved; + uint8_t flags; + uint16_t port_cap_flags2; + uint32_t active_speed_ex; }; enum ibv_event_type { @@ -993,37 +996,50 @@ enum verbs_context_mask { struct verbs_context { /* "grows up" - new fields go here */ - int (*_reserved_2) (void); - int (*destroy_flow) (struct ibv_flow *flow); - int (*_reserved_1) (void); - struct ibv_flow * (*create_flow) (struct ibv_qp *qp, - struct ibv_flow_attr *flow_attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr, + size_t port_attr_len); + int (*_reserved[25]) (void); + struct verbs_ex_private *priv; + int (*query_device_ex)(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, + size_t attr_size); + int (*ibv_destroy_flow) (struct ibv_flow *flow); + void (*ABI_placeholder2) (void); /* DO NOT COPY THIS GARBAGE */ + struct ibv_flow * (*ibv_create_flow) (struct ibv_qp *qp, + struct ibv_flow_attr *flow_attr); + void (*ABI_placeholder1) (void); /* DO NOT COPY THIS GARBAGE */ struct ibv_qp * (*open_qp)(struct ibv_context *context, struct ibv_qp_open_attr *attr); struct ibv_qp * (*create_qp_ex)(struct ibv_context *context, struct ibv_qp_init_attr_ex *qp_init_attr_ex); int (*get_srq_num)(struct ibv_srq *srq, uint32_t *srq_num); - struct ibv_srq * (*create_srq_ex)(struct ibv_context *context, - struct ibv_srq_init_attr_ex *srq_init_attr_ex); - struct ibv_xrcd * (*open_xrcd)(struct ibv_context *context, - struct ibv_xrcd_init_attr *xrcd_init_attr); - int (*close_xrcd)(struct ibv_xrcd *xrcd); - uint64_t has_comp_mask; - size_t sz; /* Must be immediately before struct ibv_context */ - struct ibv_context context;/* Must be last field in the struct */ + struct ibv_srq * (*create_srq_ex)(struct ibv_context *context, + struct ibv_srq_init_attr_ex *srq_init_attr_ex); + struct ibv_xrcd * (*open_xrcd)(struct ibv_context *context, + struct ibv_xrcd_init_attr *xrcd_init_attr); + int (*close_xrcd)(struct ibv_xrcd *xrcd); + uint64_t _ABI_placeholder3; + size_t sz; /* Must be immediately before struct ibv_context */ + struct ibv_context context; /* Must be last field in the struct */ }; -/*XXX:__VERBS_ABI_IS_EXTENDED produces warning "integer operation result is out of range" with g++ 4.8.2*/ -/*static inline struct verbs_context *verbs_get_ctx(struct ibv_context *ctx) +static inline struct verbs_context *verbs_get_ctx(struct ibv_context *ctx) { - return (!ctx || (ctx->abi_compat != __VERBS_ABI_IS_EXTENDED)) ? - NULL : container_of(ctx, struct verbs_context, context); + if (ctx->abi_compat != __VERBS_ABI_IS_EXTENDED) + return NULL; + + /* open code container_of to not pollute the global namespace */ + return (struct verbs_context *)(((uintptr_t)ctx) - + offsetof(struct verbs_context, + context)); } #define verbs_get_ctx_op(ctx, op) ({ \ - struct verbs_context *_vctx = verbs_get_ctx(ctx); \ - (!_vctx || (_vctx->sz < sizeof(*_vctx) - offsetof(struct verbs_context, op)) || \ - !_vctx->op) ? NULL : _vctx; })*/ + struct verbs_context *__vctx = verbs_get_ctx(ctx); \ + (!__vctx || (__vctx->sz < sizeof(*__vctx) - offsetof(struct verbs_context, op)) || \ + !__vctx->op) ? NULL : __vctx; }) #define verbs_set_ctx_op(_vctx, op, ptr) ({ \ struct verbs_context *vctx = _vctx; \ @@ -1055,4 +1071,20 @@ struct ibv_ece { uint32_t comp_mask; }; +/** + * ibv_query_port_ex - Get (extended) port properties + */ +static inline int ibv_query_port_ex(struct ibv_context *context, + uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + struct verbs_context *vctx = verbs_get_ctx_op(context, query_port); + + if (vctx) { + return vctx->query_port(context, port_num, port_attr, sizeof(*port_attr)); + } + + return -1; +} + #endif // NCCL_IBV_CORE_H_ diff --git a/src/include/plugin/plugin.h b/src/include/plugin/plugin.h index 7336c34d96..300e436a09 100644 --- a/src/include/plugin/plugin.h +++ b/src/include/plugin/plugin.h @@ -9,10 +9,16 @@ #include "nccl.h" +enum ncclPluginType { + ncclPluginTypeNet, + ncclPluginTypeTuner, + ncclPluginTypeProfiler, +}; + void* ncclOpenNetPluginLib(const char* name); void* ncclOpenTunerPluginLib(const char* name); void* ncclOpenProfilerPluginLib(const char* name); -void* ncclGetNetPluginLib(void); -ncclResult_t ncclClosePluginLib(void* handle); +void* ncclGetNetPluginLib(enum ncclPluginType type); +ncclResult_t ncclClosePluginLib(void* handle, enum ncclPluginType type); #endif diff --git a/src/init.cc b/src/init.cc index 2a57c46c00..af784c02d1 100644 --- a/src/init.cc +++ b/src/init.cc @@ -2170,6 +2170,7 @@ ncclResult_t ncclCommDestroy(ncclComm_t comm) { NVTX3_PAYLOAD(comm->commHash, nranks, rank, cudaDev)); TRACE(NCCL_INIT, "comm %p rank %d nRanks %d cudaDev %d busId %lx", comm, rank, nranks, cudaDev, comm->busId); + NCCLCHECK(ncclGroupStartInternal()); // Try and prevent a double free of the comm struct (user error) if (comm->rank == -1 || comm->nRanks == -1 || comm->cudaDev == -1 || comm->busId == -1) { WARN("comm %p has already been destroyed", comm); @@ -2184,6 +2185,8 @@ ncclResult_t ncclCommDestroy(ncclComm_t comm) { NCCLCHECKGOTO(ncclAsyncLaunch((struct ncclAsyncJob*)job, commReclaim, NULL, free, comm), res, fail); exit: + ncclGroupErrCheck(res); + NCCLCHECK(ncclGroupEndInternal()); return res; fail: goto exit; @@ -2207,6 +2210,7 @@ ncclResult_t ncclCommAbort(ncclComm_t comm) { if (comm == NULL) { return ncclSuccess; } + NCCLCHECK(ncclGroupStartInternal()); // Ask anything that might still be running on the device to quit NCCLCHECK(setCommAbortFlags(comm,1)); comm->destroyFlag = 1; @@ -2229,7 +2233,9 @@ ncclResult_t ncclCommAbort(ncclComm_t comm) { NCCLCHECKGOTO(ncclAsyncLaunch((struct ncclAsyncJob*)job, commReclaim, NULL, free, comm), res, fail); exit: - return ncclSuccess; + ncclGroupErrCheck(res); + NCCLCHECK(ncclGroupEndInternal()); + return res; fail: goto exit; } diff --git a/src/misc/ibvwrap.cc b/src/misc/ibvwrap.cc index 23bf5e1254..59f52e3208 100644 --- a/src/misc/ibvwrap.cc +++ b/src/misc/ibvwrap.cc @@ -142,8 +142,14 @@ ncclResult_t wrap_ibv_query_device(struct ibv_context *context, struct ibv_devic IBV_INT_CHECK_RET_ERRNO(ibvSymbols, ibv_internal_query_device, ibv_internal_query_device(context, device_attr), 0, "ibv_query_device"); } -ncclResult_t wrap_ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) { /*returns 0 on success, or the value of errno on failure (which indicates the failure reason)*/ - IBV_INT_CHECK_RET_ERRNO(ibvSymbols, ibv_internal_query_port, ibv_internal_query_port(context, port_num, port_attr), 0, "ibv_query_port"); +ncclResult_t wrap_ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) { + // First try and query the extended port attributes (e.g. active_speed_ex) + if (ibv_query_port_ex(context, port_num, port_attr) != 0) { + // Fall back to the original attribute API call, but zero all members first + memset(port_attr, 0, sizeof(*port_attr)); + IBV_INT_CHECK_RET_ERRNO(ibvSymbols, ibv_internal_query_port, ibv_internal_query_port(context, port_num, port_attr), 0, "ibv_query_port"); + } + return ncclSuccess; } ncclResult_t wrap_ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid) { diff --git a/src/misc/socket.cc b/src/misc/socket.cc index 278fb5c510..d066d28293 100644 --- a/src/misc/socket.cc +++ b/src/misc/socket.cc @@ -441,7 +441,8 @@ static ncclResult_t socketTryAccept(struct ncclSocket* sock) { if (sock->fd != -1) { sock->state = ncclSocketStateAccepted; } else if (errno == ENETDOWN || errno == EPROTO || errno == ENOPROTOOPT || errno == EHOSTDOWN || - errno == ENONET || errno == EHOSTUNREACH || errno == EOPNOTSUPP || errno == ENETUNREACH) { + errno == ENONET || errno == EHOSTUNREACH || errno == EOPNOTSUPP || errno == ENETUNREACH || + errno == EINTR) { /* per accept's man page, for linux sockets, the following errors might be already pending errors * and should be considered as EAGAIN. To avoid infinite loop in case of errors, we use the retry count*/ if (++sock->errorRetries == ncclParamRetryCnt()) { diff --git a/src/plugin/net.cc b/src/plugin/net.cc index 78944106a0..aa80c12aba 100644 --- a/src/plugin/net.cc +++ b/src/plugin/net.cc @@ -67,7 +67,7 @@ static pthread_once_t initPluginLibsOnceControl = PTHREAD_ONCE_INIT; static ncclResult_t ncclNetPluginUnload(netPluginLib_t* pluginLib) { if ((pluginLib->dlHandle) && ((pluginLib->ncclNetPluginRefCount) == 0)) { INFO(NCCL_INIT|NCCL_NET, "Unloading plugin %s", pluginLib->name); - NCCLCHECK(ncclClosePluginLib(pluginLib->dlHandle)); + NCCLCHECK(ncclClosePluginLib(pluginLib->dlHandle, ncclPluginTypeNet)); memset(pluginLib, 0, sizeof(netPluginLib_t)); } return ncclSuccess; @@ -105,8 +105,9 @@ exit: return ncclSuccess; fail: if (pluginLib->dlHandle) { - NCCLCHECK(ncclClosePluginLib(pluginLib->dlHandle)); + NCCLCHECK(ncclClosePluginLib(pluginLib->dlHandle, ncclPluginTypeNet)); } + pluginLib->dlHandle = nullptr; pluginLib->ncclNetPluginState = ncclNetPluginStateLoadFailed; pluginLib->ncclCollNetPluginState = ncclNetPluginStateLoadFailed; goto exit; diff --git a/src/plugin/plugin_open.cc b/src/plugin/plugin_open.cc index 64c97be39c..f80321c81c 100644 --- a/src/plugin/plugin_open.cc +++ b/src/plugin/plugin_open.cc @@ -10,16 +10,12 @@ #include #include "debug.h" +#include "plugin.h" #define MAX_STR_LEN 255 -enum ncclPluginType { - ncclPluginTypeNet, - ncclPluginTypeTuner, - ncclPluginTypeProfiler, -}; - #define NUM_LIBS 3 +static char* libNames[NUM_LIBS]; static void *libHandles[NUM_LIBS]; static const char *pluginNames[NUM_LIBS] = { "NET", "TUNER", "PROFILER" }; static const char *pluginPrefix[NUM_LIBS] = { "libnccl-net", "libnccl-tuner", "libnccl-profiler" }; @@ -65,6 +61,7 @@ static void* openPluginLib(enum ncclPluginType type, const char* libName) { libHandles[type] = tryOpenLib(libName_, &openErr, openErrStr); if (libHandles[type]) { INFO(subsys[type], "%s/Plugin: Plugin name set by env to %s", pluginNames[type], libName_); + libNames[type] = strdup(libName_); return libHandles[type]; } if (openErr == ENOENT) { @@ -79,6 +76,7 @@ static void* openPluginLib(enum ncclPluginType type, const char* libName) { libHandles[type] = tryOpenLib(libName_, &openErr, openErrStr); if (libHandles[type]) { INFO(subsys[type], "%s/Plugin: Plugin name set by env to %s", pluginNames[type], libName_); + libNames[type] = strdup(libName_); return libHandles[type]; } if (openErr == ENOENT) { @@ -91,6 +89,7 @@ static void* openPluginLib(enum ncclPluginType type, const char* libName) { snprintf(libName_, MAX_STR_LEN, "%s.so", pluginPrefix[type]); libHandles[type] = tryOpenLib(libName_, &openErr, openErrStr); if (libHandles[type]) { + libNames[type] = strdup(libName_); return libHandles[type]; } if (openErr == ENOENT) { @@ -120,22 +119,21 @@ void* ncclOpenProfilerPluginLib(const char* name) { return openPluginLib(ncclPluginTypeProfiler, name); } -void* ncclGetNetPluginLib(void) { - return libHandles[ncclPluginTypeNet]; +void* ncclGetNetPluginLib(enum ncclPluginType type) { + if (libNames[ncclPluginTypeNet]) { + // increment the reference counter of the net library + libNames[type] = strdup(libNames[ncclPluginTypeNet]); + libHandles[type] = dlopen(libNames[ncclPluginTypeNet], RTLD_NOW | RTLD_LOCAL); + } + return libHandles[type]; } -ncclResult_t ncclClosePluginLib(void* handle) { - bool found = false; - for (int l=0; lname); - NCCLCHECK(ncclClosePluginLib(profilerPluginLib)); + NCCLCHECK(ncclClosePluginLib(profilerPluginLib, ncclPluginTypeProfiler)); profilerPluginLib = nullptr; ncclProfiler = nullptr; profilerPluginStatus = profilerPluginLoadReady; diff --git a/src/plugin/tuner.cc b/src/plugin/tuner.cc index 443bf78c48..24a59de2e2 100644 --- a/src/plugin/tuner.cc +++ b/src/plugin/tuner.cc @@ -52,7 +52,7 @@ ncclResult_t ncclTunerPluginLoad(struct ncclComm* comm) { tunerPluginLib = ncclOpenTunerPluginLib(ncclGetEnv("NCCL_TUNER_PLUGIN")); if (nullptr == tunerPluginLib) { - tunerPluginLib = ncclGetNetPluginLib(); + tunerPluginLib = ncclGetNetPluginLib(ncclPluginTypeTuner); if (nullptr == tunerPluginLib) { goto fail; } @@ -78,6 +78,7 @@ exit: pthread_mutex_unlock(&tunerPluginLock); return ncclSuccess; fail: + if (tunerPluginLib) NCCLCHECK(ncclClosePluginLib(tunerPluginLib, ncclPluginTypeTuner)); tunerPluginLib = nullptr; status = tunerPluginLoadFailed; goto exit; @@ -87,7 +88,7 @@ ncclResult_t ncclTunerPluginUnload(struct ncclComm* comm) { pthread_mutex_lock(&tunerPluginLock); if (comm->tunerPluginLoaded && 0 == (--tunerPluginRefCount)) { INFO(NCCL_TUNING, "TUNER/Plugin: Closing tuner: '%s'", tunerSymbol->name); - NCCLCHECK(ncclClosePluginLib(tunerPluginLib)); + NCCLCHECK(ncclClosePluginLib(tunerPluginLib, ncclPluginTypeTuner)); tunerPluginLib = nullptr; tunerSymbol = nullptr; comm->tuner = nullptr; diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc index 40897d93f2..709e7ad400 100644 --- a/src/transport/net_ib.cc +++ b/src/transport/net_ib.cc @@ -652,12 +652,15 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction, ncclProfilerCallback_t pr enum ncclIbProvider ibProvider = IB_PROVIDER_NONE; char dataDirectDevicePath[PATH_MAX]; int dataDirectSupported = 0; + int skipNetDevForDataDirect = 0; if (wrap_mlx5dv_is_supported(devices[d])) { ibProvider = IB_PROVIDER_MLX5; snprintf(dataDirectDevicePath, PATH_MAX, "/sys"); if((ncclMlx5dvDmaBufCapable(context)) && (wrap_mlx5dv_get_data_direct_sysfs_path(context, dataDirectDevicePath + 4, PATH_MAX - 4) == ncclSuccess)) { - INFO(NCCL_INIT|NCCL_NET, "Data Direct DMA Interface is detected for device:%s", devices[d]->name); - if(ncclParamIbDataDirect()) dataDirectSupported = 1; + INFO(NCCL_INIT|NCCL_NET, "NET/IB: Data Direct DMA Interface is detected for device:%s", devices[d]->name); + // Now check whether Data Direct has been disabled by the user + if(ncclParamIbDataDirect() == 1) { dataDirectSupported = 1; skipNetDevForDataDirect = 1; } + if(ncclParamIbDataDirect() == 2) { dataDirectSupported = 1; skipNetDevForDataDirect = 0; } } } int nPorts = 0; @@ -669,7 +672,8 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction, ncclProfilerCallback_t pr continue; } for (int port_num = 1; port_num <= devAttr.phys_port_cnt; port_num++) { - for (int dataDirect = 0; dataDirect < 1 + dataDirectSupported; ++dataDirect) { + // dataDirect = 0 exposes the devices normally, dataDirect = 1 exposes the devices through direct NIC + for (int dataDirect = skipNetDevForDataDirect; dataDirect < 1 + dataDirectSupported; ++dataDirect) { struct ibv_port_attr portAttr; if (ncclSuccess != wrap_ibv_query_port(context, port_num, &portAttr)) { WARN("NET/IB : Unable to query port_num %d", port_num); @@ -690,15 +694,18 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction, ncclProfilerCallback_t pr ncclIbDevs[ncclNIbDevs].portAttr = portAttr; ncclIbDevs[ncclNIbDevs].portNum = port_num; ncclIbDevs[ncclNIbDevs].link = portAttr.link_layer; - ncclIbDevs[ncclNIbDevs].speed = ncclIbSpeed(portAttr.active_speed) * ncclIbWidth(portAttr.active_width); + if (portAttr.active_speed_ex) + // A non-zero active_speed_ex indicates XDR rate (0x100) or higher + ncclIbDevs[ncclNIbDevs].speed = ncclIbSpeed(portAttr.active_speed_ex) * ncclIbWidth(portAttr.active_width); + else + ncclIbDevs[ncclNIbDevs].speed = ncclIbSpeed(portAttr.active_speed) * ncclIbWidth(portAttr.active_width); ncclIbDevs[ncclNIbDevs].context = context; ncclIbDevs[ncclNIbDevs].pdRefs = 0; ncclIbDevs[ncclNIbDevs].pd = NULL; if (!dataDirect) { strncpy(ncclIbDevs[ncclNIbDevs].devName, devices[d]->name, MAXNAMESIZE); NCCLCHECKGOTO(ncclIbGetPciPath(ncclIbDevs[ncclNIbDevs].devName, &ncclIbDevs[ncclNIbDevs].pciPath, &ncclIbDevs[ncclNIbDevs].realPort), ret, fail); - } - else { + } else { snprintf(ncclIbDevs[ncclNIbDevs].devName, MAXNAMESIZE, "%s_dma", devices[d]->name); NCCLCHECK(ncclCalloc(&ncclIbDevs[ncclNIbDevs].pciPath, PATH_MAX)); strncpy(ncclIbDevs[ncclNIbDevs].pciPath, dataDirectDevicePath, PATH_MAX);