Merge remote-tracking branch 'nccl/master' into develop
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc
|
||||
index 7af56a6c..5c3e3d46 100644
|
||||
index 9bfd8dcf..4d3f0a08 100644
|
||||
--- a/src/transport/net_ib.cc
|
||||
+++ b/src/transport/net_ib.cc
|
||||
@@ -28,6 +28,7 @@
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
#include "ibvwrap.h"
|
||||
#include "mlx5/mlx5dvwrap.h"
|
||||
@@ -10,9 +10,9 @@ index 7af56a6c..5c3e3d46 100644
|
||||
#include "graph/xml.h"
|
||||
|
||||
#define MAXSUFFIXSIZE 16
|
||||
@@ -107,9 +108,31 @@ struct ncclIbMergedDev ncclIbMergedDevs[MAX_IB_VDEVS];
|
||||
@@ -110,16 +111,38 @@ struct ncclIbMergedDev ncclIbMergedDevs[MAX_IB_VDEVS];
|
||||
struct ncclIbDev ncclIbDevs[MAX_IB_DEVS];
|
||||
pthread_mutex_t ncclIbLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static std::mutex ncclIbMutex;
|
||||
static int ncclIbRelaxedOrderingEnabled = 0;
|
||||
+static bool rcclAinicRoce = 0;
|
||||
+static bool rcclCtsInlineData = 0;
|
||||
@@ -35,6 +35,13 @@ index 7af56a6c..5c3e3d46 100644
|
||||
+static ncclChannelToUd nccl_channel_ud_map[MAXCHANNELS][ncclIbChannelTypeMax];
|
||||
+static bool nccl_channel_last_ud[MAX_IB_DEVS][ncclIbChannelTypeMax];
|
||||
|
||||
// With ncclNet_v11_t the NCCL core initializes the network plugin per-communicator
|
||||
// rather than once for all communicators. However, the internal plugin implementation
|
||||
// still assumes the plugin is initialized only once across all communicators. The ref
|
||||
// counter makes sure the plugin internally initializes only once. When per communicator
|
||||
// context support is added to the plugin the ref counter can be removed.
|
||||
static int netRefCount;
|
||||
|
||||
#define NCCL_IB_LLSTR(ll) (((ll) == IBV_LINK_LAYER_INFINIBAND) ? "IB" : (((ll) == IBV_LINK_LAYER_ETHERNET) ? "RoCE" : "UNSPECIFIED"))
|
||||
|
||||
+#define NCCL_CTS_QP_SLOT_INVALID 0xFF
|
||||
@@ -42,7 +49,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
#define NCCL_IB_SL_DEFAULT 0
|
||||
#define NCCL_IB_TC_DEFAULT 0
|
||||
|
||||
@@ -131,6 +154,13 @@ NCCL_PARAM(IbEceEnable,"IB_ECE_ENABLE",1);
|
||||
@@ -141,6 +164,13 @@ NCCL_PARAM(IbEceEnable,"IB_ECE_ENABLE",1);
|
||||
NCCL_PARAM(IbDataDirect,"IB_DATA_DIRECT",1);
|
||||
NCCL_PARAM(IbQpsPerConn, "IB_QPS_PER_CONNECTION", 1);
|
||||
RCCL_PARAM(IbQpsPerP2p, "IB_QPS_PER_P2P", 0);
|
||||
@@ -56,7 +63,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
|
||||
static ncclResult_t ncclIbStatsInit(struct ncclIbStats* stat) {
|
||||
__atomic_store_n(&stat->fatalErrorCount, 0, __ATOMIC_RELAXED);
|
||||
@@ -630,6 +660,10 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction, ncclProfilerCallback_t pr
|
||||
@@ -779,6 +809,10 @@ ncclResult_t ncclIbInit(void** ctx, uint64_t commId, ncclNetCommConfig_t* config
|
||||
static int shownIbHcaEnv = 0;
|
||||
if(wrap_ibv_symbols() != ncclSuccess) { return ncclInternalError; }
|
||||
if(wrap_mlx5dv_symbols() != ncclSuccess) { INFO(NCCL_NET, "NET/IB : Failed to open mlx5dv symbols. Advance features like CX-8 Direct-NIC will be disabled."); }
|
||||
@@ -67,7 +74,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
|
||||
// Detect IB cards
|
||||
int nIbDevs = 0;
|
||||
@@ -783,6 +817,24 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction, ncclProfilerCallback_t pr
|
||||
@@ -944,6 +978,23 @@ ncclResult_t ncclIbInit(void** ctx, uint64_t commId, ncclNetCommConfig_t* config
|
||||
INFO(NCCL_INIT|NCCL_NET, "NET/IB : Using%s %s; OOB %s:%s", line, ncclIbRelaxedOrderingEnabled ? "[RO]" : "",
|
||||
ncclIbIfName, ncclSocketToString(&ncclIbIfAddr, addrline));
|
||||
|
||||
@@ -88,11 +95,10 @@ index 7af56a6c..5c3e3d46 100644
|
||||
+ "IB Use Inline: enabled; GDR Flush: disabled", rcclCtsInlineData ? "Enabled": "Disabled",
|
||||
+ rcclCtsOffloadEnabled ? "Enabled": "Disabled");
|
||||
+ }
|
||||
+
|
||||
pthread_mutex_unlock(&ncclIbLock);
|
||||
}
|
||||
exit:
|
||||
@@ -1112,6 +1164,8 @@ struct ncclIbListenComm {
|
||||
ibContext.trafficClass = config->trafficClass;
|
||||
@@ -1271,6 +1322,8 @@ struct ncclIbListenComm {
|
||||
struct ncclIbCommStage stage;
|
||||
};
|
||||
|
||||
@@ -101,7 +107,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct alignas(64) ncclIbSendFifo {
|
||||
uint64_t addr;
|
||||
uint64_t size;
|
||||
@@ -1122,10 +1176,21 @@ struct alignas(64) ncclIbSendFifo {
|
||||
@@ -1281,10 +1334,21 @@ struct alignas(64) ncclIbSendFifo {
|
||||
char padding[16];
|
||||
};
|
||||
|
||||
@@ -123,7 +129,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
};
|
||||
|
||||
struct ncclIbRemSizesFifo {
|
||||
@@ -1172,6 +1237,7 @@ struct ncclIbSendComm {
|
||||
@@ -1331,6 +1395,7 @@ struct ncclIbSendComm {
|
||||
struct ncclIbNetCommBase base;
|
||||
// Start with fifo and ibv structs as they have alignment restrictions
|
||||
struct ncclIbSendFifo fifo[MAX_REQUESTS][NCCL_NET_IB_MAX_RECVS];
|
||||
@@ -131,7 +137,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct ibv_sge sges[NCCL_NET_IB_MAX_RECVS];
|
||||
struct ibv_send_wr wrs[NCCL_NET_IB_MAX_RECVS + 1];
|
||||
// Each dev correlates to a mergedIbDev
|
||||
@@ -1187,6 +1253,7 @@ struct ncclIbSendComm {
|
||||
@@ -1346,6 +1411,7 @@ struct ncclIbSendComm {
|
||||
static_assert((sizeof(struct ncclIbNetCommBase) % 32) == 0, "ncclIbNetCommBase size must be 32-byte multiple to ensure fifo is at proper offset");
|
||||
static_assert((offsetof(struct ncclIbSendComm, fifo) % 32) == 0, "ncclIbSendComm fifo must be 32-byte aligned");
|
||||
static_assert((sizeof(struct ncclIbSendFifo) % 32) == 0, "ncclIbSendFifo element size must be 32-byte multiples");
|
||||
@@ -139,7 +145,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
static_assert((offsetof(struct ncclIbSendComm, sges) % 32) == 0, "sges must be 32-byte aligned");
|
||||
static_assert((offsetof(struct ncclIbSendComm, wrs) % 32) == 0, "wrs must be 32-byte aligned");
|
||||
|
||||
@@ -1201,6 +1268,7 @@ struct ncclIbGpuFlush {
|
||||
@@ -1360,6 +1426,7 @@ struct ncclIbGpuFlush {
|
||||
|
||||
struct ncclIbRemFifo {
|
||||
struct ncclIbSendFifo elems[MAX_REQUESTS][NCCL_NET_IB_MAX_RECVS];
|
||||
@@ -147,8 +153,8 @@ index 7af56a6c..5c3e3d46 100644
|
||||
uint64_t fifoTail;
|
||||
uint64_t addr;
|
||||
uint32_t flags;
|
||||
@@ -1265,20 +1333,59 @@ returning:
|
||||
return res;
|
||||
@@ -1415,20 +1482,59 @@ ncclResult_t ncclIbDestroyBase(struct ncclIbNetCommDevBase* base) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
-ncclResult_t ncclIbCreateQp(uint8_t ib_port, struct ncclIbNetCommDevBase* base, int access_flags, void* qp_context, struct ncclIbQp* qp) {
|
||||
@@ -209,7 +215,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct ibv_qp_attr qpAttr;
|
||||
memset(&qpAttr, 0, sizeof(struct ibv_qp_attr));
|
||||
qpAttr.qp_state = IBV_QPS_INIT;
|
||||
@@ -1288,6 +1395,9 @@ ncclResult_t ncclIbCreateQp(uint8_t ib_port, struct ncclIbNetCommDevBase* base,
|
||||
@@ -1438,6 +1544,9 @@ ncclResult_t ncclIbCreateQp(uint8_t ib_port, struct ncclIbNetCommDevBase* base,
|
||||
NCCLCHECK(wrap_ibv_modify_qp(qp->qp, &qpAttr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS));
|
||||
TRACE(NCCL_NET, "NET/IB : ncclIbCreateQp port=%d dev=%d devName=%s ndevs=%d nmdevs=%d qpn=%u pkey=%u pd=%p",
|
||||
ib_port, base->ibDevN, ncclIbDevs[base->ibDevN].devName, ncclNIbDevs, ncclNMergedIbDevs, qp->qp->qp_num, qpAttr.pkey_index, base->pd);
|
||||
@@ -219,16 +225,16 @@ index 7af56a6c..5c3e3d46 100644
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
@@ -1371,7 +1481,7 @@ fail:
|
||||
@@ -1521,7 +1630,7 @@ fail:
|
||||
goto exit;
|
||||
}
|
||||
|
||||
-ncclResult_t ncclIbConnect(int dev, ncclNetCommConfig_t* config, void* opaqueHandle, void** sendComm, ncclNetDeviceHandle_t** /*sendDevComm*/) {
|
||||
+ncclResult_t ncclIbConnect(int dev, ncclNetCommConfig_t* config, void* opaqueHandle, void** sendComm, ncclNetDeviceHandle_t** sendDevComm) {
|
||||
-ncclResult_t ncclIbConnect(void* ctx, int dev, void* opaqueHandle, void** sendComm, ncclNetDeviceHandle_t** /*sendDevComm*/) {
|
||||
+ncclResult_t ncclIbConnect(void* ctx, int dev, void* opaqueHandle, void** sendComm, ncclNetDeviceHandle_t** sendDevComm) {
|
||||
ncclResult_t ret = ncclSuccess;
|
||||
struct ncclIbHandle* handle = (struct ncclIbHandle*) opaqueHandle;
|
||||
struct ncclIbCommStage* stage = &handle->stage;
|
||||
@@ -1379,8 +1489,13 @@ ncclResult_t ncclIbConnect(int dev, ncclNetCommConfig_t* config, void* opaqueHan
|
||||
@@ -1529,8 +1638,13 @@ ncclResult_t ncclIbConnect(void* ctx, int dev, void* opaqueHandle, void** sendCo
|
||||
int ready;
|
||||
uint8_t link_layer = IBV_LINK_LAYER_UNSPECIFIED;
|
||||
int isP2p = 0;
|
||||
@@ -242,7 +248,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
if (stage->state == ncclIbCommStateConnect) goto ib_connect_check;
|
||||
if (stage->state == ncclIbCommStateSendDevList) goto ib_send_dev_list;
|
||||
if (stage->state == ncclIbCommStateRecvDevList) goto ib_recv_dev_list;
|
||||
@@ -1461,7 +1576,7 @@ ib_recv_dev_list:
|
||||
@@ -1612,7 +1726,7 @@ ib_recv_dev_list:
|
||||
for (int q = 0; q < comm->base.nqps; q++) {
|
||||
ncclIbSendCommDev* commDev = comm->devs + devIndex;
|
||||
ncclIbDev* ibDev = ncclIbDevs + commDev->base.ibDevN;
|
||||
@@ -251,7 +257,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
comm->base.qps[q].devIndex = devIndex;
|
||||
meta.qpInfo[q].qpn = comm->base.qps[q].qp->qp_num;
|
||||
meta.qpInfo[q].devIndex = comm->base.qps[q].devIndex;
|
||||
@@ -1486,7 +1601,11 @@ ib_recv_dev_list:
|
||||
@@ -1637,7 +1751,11 @@ ib_recv_dev_list:
|
||||
devInfo->lid = ibDev->portAttr.lid;
|
||||
devInfo->ibv_dev_index = commDev->base.ibDevN;
|
||||
// Prepare my fifo
|
||||
@@ -264,10 +270,10 @@ index 7af56a6c..5c3e3d46 100644
|
||||
devInfo->fifoRkey = commDev->fifoMr->rkey;
|
||||
|
||||
// Pack local GID info
|
||||
@@ -1528,7 +1647,11 @@ ib_recv_dev_list:
|
||||
return ncclInternalError;
|
||||
@@ -1680,7 +1798,11 @@ ib_recv_dev_list:
|
||||
}
|
||||
}
|
||||
config = (ncclNetCommConfig_t*)ctx;
|
||||
- meta.fifoAddr = (uint64_t)comm->fifo;
|
||||
+ if (rcclCtsInlineData) {
|
||||
+ meta.fifoAddr = (uint64_t)comm->fifo_inline;
|
||||
@@ -277,7 +283,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
meta.sl = (ncclParamIbSl() != -1) ? ncclParamIbSl() : (config && config->trafficClass != NCCL_NET_TRAFFIC_CLASS_UNDEF) ? config->trafficClass : NCCL_IB_SL_DEFAULT;
|
||||
meta.tc = (ncclParamIbTc() != -1) ? ncclParamIbTc() : (config && config->trafficClass != NCCL_NET_TRAFFIC_CLASS_UNDEF) ? config->trafficClass : NCCL_IB_TC_DEFAULT;
|
||||
strncpy(meta.devName, mergedDev->devName, MAX_MERGED_DEV_NAME);
|
||||
@@ -1673,18 +1796,22 @@ ncclResult_t ncclIbCheckVProps(ncclNetVDeviceProps_t* vProps1, ncclNetVDevicePro
|
||||
@@ -1825,18 +1947,22 @@ ncclResult_t ncclIbCheckVProps(ncclNetVDeviceProps_t* vProps1, ncclNetVDevicePro
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
@@ -302,7 +308,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
if (stage->state == ncclIbCommStateAccept) goto ib_accept_check;
|
||||
if (stage->state == ncclIbCommStateRecvDevList) goto ib_recv_dev_list;
|
||||
if (stage->state == ncclIbCommStateSendDevList) goto ib_send_dev_list;
|
||||
@@ -1814,7 +1941,7 @@ ib_recv:
|
||||
@@ -1966,7 +2092,7 @@ ib_recv:
|
||||
// Local ibDevN
|
||||
ibDevN = rComm->devs[devIndex].base.ibDevN;
|
||||
ibDev = ncclIbDevs + ibDevN;
|
||||
@@ -311,7 +317,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
qp->devIndex = devIndex;
|
||||
devIndex = (devIndex + 1) % rComm->base.vProps.ndevs;
|
||||
|
||||
@@ -1840,16 +1967,22 @@ ib_recv:
|
||||
@@ -1992,16 +2118,22 @@ ib_recv:
|
||||
|
||||
useDmaBuf = (ncclIbDmaBufSupport(lComm->dev) == ncclSuccess);
|
||||
rComm->flushEnabled = ((ncclIbGdrSupport() == ncclSuccess || useDmaBuf)
|
||||
@@ -337,7 +343,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
|
||||
// Allocate Flush dummy buffer for GPU Direct RDMA
|
||||
if (rComm->flushEnabled) {
|
||||
@@ -1887,7 +2020,7 @@ ib_recv:
|
||||
@@ -2039,7 +2171,7 @@ ib_recv:
|
||||
rCommDev->gpuFlush.sge.addr = (uint64_t)&rComm->gpuFlushHostMem;
|
||||
rCommDev->gpuFlush.sge.length = 1;
|
||||
rCommDev->gpuFlush.sge.lkey = rCommDev->gpuFlush.hostMr->lkey;
|
||||
@@ -346,7 +352,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct ncclIbDevInfo devInfo;
|
||||
devInfo.lid = ibDev->portAttr.lid;
|
||||
devInfo.link_layer = ibDev->portAttr.link_layer;
|
||||
@@ -2115,10 +2248,15 @@ ncclResult_t ncclIbDeregMr(void* comm, void* mhandle) {
|
||||
@@ -2257,10 +2389,15 @@ ncclResult_t ncclIbDeregMr(void* comm, void* mhandle) {
|
||||
|
||||
NCCL_PARAM(IbSplitDataOnQps, "IB_SPLIT_DATA_ON_QPS", 0);
|
||||
|
||||
@@ -364,7 +370,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
if (nreqs > NCCL_NET_IB_MAX_RECVS) return ncclInternalError;
|
||||
|
||||
uint64_t wr_id = 0ULL;
|
||||
@@ -2130,7 +2268,11 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
@@ -2272,7 +2409,11 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
sge->addr=(uintptr_t)reqs[r]->send.data;
|
||||
wr->opcode = IBV_WR_RDMA_WRITE;
|
||||
wr->send_flags = 0;
|
||||
@@ -377,7 +383,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
wr->next = wr + 1;
|
||||
wr_id += (reqs[r] - comm->base.reqs) << (r*8);
|
||||
#ifdef NCCL_ENABLE_NET_PROFILING
|
||||
@@ -2141,7 +2283,7 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
@@ -2283,7 +2424,7 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
// Write size as immediate data. In the case of multi-send, only write
|
||||
// 0 or 1 as size to indicate whether there was data sent or received.
|
||||
uint32_t immData = 0;
|
||||
@@ -386,7 +392,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
immData = reqs[0]->send.size;
|
||||
} else {
|
||||
int* sizes = comm->remSizesFifo.elems[slot];
|
||||
@@ -2151,22 +2293,24 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
@@ -2293,22 +2434,24 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
}
|
||||
|
||||
struct ibv_send_wr* lastWr = comm->wrs+nreqs-1;
|
||||
@@ -424,7 +430,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
lastWr->next = NULL;
|
||||
lastWr->send_flags = IBV_SEND_SIGNALED;
|
||||
|
||||
@@ -2182,7 +2326,11 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
@@ -2324,7 +2467,11 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
//ncclIbAddEvent(reqs[r], devIndex, &comm->devs[devIndex].base);
|
||||
|
||||
// Select proper rkey (needed even for 0-size send)
|
||||
@@ -437,7 +443,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
|
||||
int chunkSize = DIVUP(DIVUP(reqs[r]->send.size, nqps), align) * align;
|
||||
int length = std::min(reqs[r]->send.size-reqs[r]->send.offset, chunkSize);
|
||||
@@ -2198,7 +2346,7 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
@@ -2340,7 +2487,7 @@ ncclResult_t ncclIbMultiSend(struct ncclIbSendComm* comm, int slot) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,7 +452,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
// Also make sure lastWr writes remote sizes using the right lkey
|
||||
comm->remSizesFifo.sge.lkey = comm->remSizesFifo.mrs[devIndex]->lkey;
|
||||
lastWr->wr.rdma.rkey = comm->remSizesFifo.rkeys[devIndex];
|
||||
@@ -2256,32 +2404,46 @@ ncclResult_t ncclIbIsend(void* sendComm, void* data, size_t size, int tag, void*
|
||||
@@ -2398,32 +2545,46 @@ ncclResult_t ncclIbIsend(void* sendComm, void* data, size_t size, int tag, void*
|
||||
NCCLCHECK(ncclIbStatsCheckFatalCount(&comm->base.stats,__func__));
|
||||
|
||||
struct ncclIbMrHandle* mhandleWrapper = (struct ncclIbMrHandle*) mhandle;
|
||||
@@ -511,7 +517,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
}
|
||||
|
||||
struct ncclIbRequest* req;
|
||||
@@ -2325,10 +2487,12 @@ ncclResult_t ncclIbIsend(void* sendComm, void* data, size_t size, int tag, void*
|
||||
@@ -2467,10 +2628,12 @@ ncclResult_t ncclIbIsend(void* sendComm, void* data, size_t size, int tag, void*
|
||||
}
|
||||
|
||||
TIME_START(0);
|
||||
@@ -526,7 +532,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
memset(reqs, 0, NCCL_NET_IB_MAX_RECVS*sizeof(struct ncclIbRequest*));
|
||||
comm->fifoHead++;
|
||||
TIME_STOP(0);
|
||||
@@ -2341,30 +2505,60 @@ ncclResult_t ncclIbIsend(void* sendComm, void* data, size_t size, int tag, void*
|
||||
@@ -2483,30 +2646,60 @@ ncclResult_t ncclIbIsend(void* sendComm, void* data, size_t size, int tag, void*
|
||||
|
||||
ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, size_t* sizes, int* tags, void** mhandles, struct ncclIbRequest* req) {
|
||||
struct ibv_send_wr wr;
|
||||
@@ -566,10 +572,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct ncclIbMrHandle* mhandleWrapper = (struct ncclIbMrHandle*) mhandles[i];
|
||||
+ if (rcclCtsInlineData) {
|
||||
+ localElemCtsInline[i].addr = (uint64_t)data[i];
|
||||
|
||||
- // Send all applicable rkeys
|
||||
- for (int j = 0; j < comm->base.vProps.ndevs; j++)
|
||||
- localElem[i].rkeys[j] = mhandleWrapper->mrs[j]->rkey;
|
||||
+
|
||||
+ // Send all applicable rkeys
|
||||
+ for (int j = 0; j < comm->base.vProps.ndevs; j++)
|
||||
+ localElemCtsInline[i].rkeys[j] = mhandleWrapper->mrs[j]->rkey;
|
||||
@@ -583,14 +586,17 @@ index 7af56a6c..5c3e3d46 100644
|
||||
+ } else {
|
||||
+ localElem[i].addr = (uint64_t)data[i];
|
||||
|
||||
- // Send all applicable rkeys
|
||||
- for (int j = 0; j < comm->base.vProps.ndevs; j++)
|
||||
- localElem[i].rkeys[j] = mhandleWrapper->mrs[j]->rkey;
|
||||
+ // Send all applicable rkeys
|
||||
+ for (int j = 0; j < comm->base.vProps.ndevs; j++)
|
||||
+ localElem[i].rkeys[j] = mhandleWrapper->mrs[j]->rkey;
|
||||
|
||||
- localElem[i].nreqs = n;
|
||||
- localElem[i].size = sizes[i]; // Sanity/Debugging
|
||||
- localElem[i].tag = tags[i];
|
||||
- localElem[i].idx = comm->remFifo.fifoTail+1;
|
||||
+ // Send all applicable rkeys
|
||||
+ for (int j = 0; j < comm->base.vProps.ndevs; j++)
|
||||
+ localElem[i].rkeys[j] = mhandleWrapper->mrs[j]->rkey;
|
||||
+
|
||||
+ localElem[i].nreqs = n;
|
||||
+ localElem[i].size = sizes[i]; // Sanity/Debugging
|
||||
+ localElem[i].tag = tags[i];
|
||||
@@ -600,7 +606,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
}
|
||||
wr.wr.rdma.remote_addr = comm->remFifo.addr + slot*NCCL_NET_IB_MAX_RECVS*sizeof(struct ncclIbSendFifo);
|
||||
|
||||
@@ -2372,8 +2566,12 @@ ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, siz
|
||||
@@ -2514,8 +2707,12 @@ ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, siz
|
||||
wr.wr.rdma.rkey = comm->base.remDevs[ctsQp->remDevIdx].fifoRkey;
|
||||
|
||||
// Set the correct sge properties
|
||||
@@ -615,7 +621,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
wr.sg_list = &comm->devs[ctsQp->devIndex].fifoSge;
|
||||
wr.num_sge = 1;
|
||||
|
||||
@@ -2403,7 +2601,13 @@ ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, siz
|
||||
@@ -2545,7 +2742,13 @@ ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, siz
|
||||
//
|
||||
// slot == devIndex - When writing to fifo slot N, and this QP lives on device index N, it should send signalled.
|
||||
// This works out that each fifo posting QP gets drained
|
||||
@@ -630,7 +636,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
wr.send_flags |= IBV_SEND_SIGNALED;
|
||||
wr.wr_id = req - comm->base.reqs;
|
||||
ncclIbAddEvent(req, ctsQp->devIndex, &comm->devs[ctsQp->devIndex].base);
|
||||
@@ -2418,10 +2622,16 @@ ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, siz
|
||||
@@ -2560,10 +2763,16 @@ ncclResult_t ncclIbPostFifo(struct ncclIbRecvComm* comm, int n, void** data, siz
|
||||
|
||||
comm->remFifo.fifoTail++;
|
||||
|
||||
@@ -647,7 +653,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct ncclIbRecvComm* comm = (struct ncclIbRecvComm*)recvComm;
|
||||
if (comm->base.ready == 0) {
|
||||
WARN("NET/IB: ncclIbIrecv() called when comm->base.ready == 0");
|
||||
@@ -2431,6 +2641,11 @@ ncclResult_t ncclIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
|
||||
@@ -2573,6 +2782,11 @@ ncclResult_t ncclIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
|
||||
if (n > NCCL_NET_IB_MAX_RECVS) return ncclInternalError;
|
||||
NCCLCHECK(ncclIbStatsCheckFatalCount(&comm->base.stats,__func__));
|
||||
|
||||
@@ -659,7 +665,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
struct ncclIbRequest* req;
|
||||
NCCLCHECK(ncclIbGetRequest(&comm->base, &req));
|
||||
req->type = NCCL_NET_IB_REQ_RECV;
|
||||
@@ -2444,50 +2659,64 @@ ncclResult_t ncclIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
|
||||
@@ -2586,50 +2800,64 @@ ncclResult_t ncclIbIrecv(void* recvComm, int n, void** data, size_t* sizes, int*
|
||||
req->devBases[i] = &comm->devs[i].base;
|
||||
}
|
||||
|
||||
@@ -756,7 +762,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
}
|
||||
|
||||
ncclResult_t ncclIbIflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) {
|
||||
@@ -2556,6 +2785,8 @@ static int getReqQpIndex(struct ncclIbRequest* req, int request, int qpNumber) {
|
||||
@@ -2698,6 +2926,8 @@ static int getReqQpIndex(struct ncclIbRequest* req, int request, int qpNumber) {
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -765,7 +771,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
ncclResult_t ncclIbTest(void* request, int* done, int* sizes) {
|
||||
struct ncclIbRequest *r = (struct ncclIbRequest*)request;
|
||||
*done = 0;
|
||||
@@ -2589,13 +2820,18 @@ ncclResult_t ncclIbTest(void* request, int* done, int* sizes) {
|
||||
@@ -2731,13 +2961,18 @@ ncclResult_t ncclIbTest(void* request, int* done, int* sizes) {
|
||||
|
||||
int totalWrDone = 0;
|
||||
int wrDone = 0;
|
||||
@@ -786,7 +792,7 @@ index 7af56a6c..5c3e3d46 100644
|
||||
totalWrDone += wrDone;
|
||||
if (wrDone == 0) { TIME_CANCEL(3); } else { TIME_STOP(3); }
|
||||
if (wrDone == 0) continue;
|
||||
@@ -2742,7 +2978,7 @@ ncclResult_t rcclNetP2pPolicy(void* handle, int isP2p) {
|
||||
@@ -2889,7 +3124,7 @@ ncclResult_t rcclNetP2pPolicy(void* handle, int isP2p) {
|
||||
}
|
||||
|
||||
ncclNet_t ncclNetIb = {
|
||||
|
||||
مرجع در شماره جدید
Block a user