revert memcpy use for direct AG (#2146)

Co-authored-by: Islam <nusislam@amd.com>
Bu işleme şunda yer alıyor:
Nusrat Islam
2026-01-20 13:58:28 -06:00
işlemeyi yapan: GitHub
ebeveyn 2fdcceaabb
işleme f3c5156bbf
+5 -7
Dosyayı Görüntüle
@@ -131,16 +131,14 @@ ncclResult_t ncclAllGather_impl(const void* sendbuff, void* recvbuff, size_t sen
dstBuf = recvbuff;
}
if (!in_place)
CUDACHECK(cudaMemcpyAsync((char*)dstBuf + rank * rankOffset, srcBuf, rankOffset, cudaMemcpyDeviceToDevice, stream));
NCCLCHECK(ncclGroupStart());
for (int r = 0; r < nRanks; r++) {
if (r != rank) {
NCCLCHECK(ncclSend(((char*)dstBuf) + rank * rankOffset, sendcount, datatype, r, comm, stream));
NCCLCHECK(ncclRecv(((char*)dstBuf) + r * rankOffset, sendcount, datatype, r, comm, stream));
}
if (r == rank && in_place)
continue;
NCCLCHECK(ncclSend(((char*)srcBuf), sendcount, datatype, r, comm, stream));
NCCLCHECK(ncclRecv(((char*)dstBuf) + r * rankOffset, sendcount, datatype, r, comm, stream));
}
NCCLCHECK(ncclGroupEnd());
return ncclSuccess;