Add tree algorithms for allreduce to improve performance at scale.
Add ncclCommAbort() and ncclCommGetAsyncError() to properly handle
network errors and be permit recover.
Detect initial CPU affinity and no longer escape it.
This commit is contained in:
Sylvain Jeaugey
2018-12-13 15:56:12 -08:00
förälder 4861e197fd
incheckning 1450d42675
66 ändrade filer med 3746 tillägg och 3251 borttagningar
+2 -1
Visa fil
@@ -25,8 +25,9 @@ NCCL_MAJOR=${nccl:Major}
NCCL_MINOR=${nccl:Minor}
NCCL_PATCH=${nccl:Patch}
NCCL_SUFFIX=${nccl:Suffix}
NCCL_BUILD=${pkg:Revision}
NCCLNAME="nccl-src_${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}${NCCL_SUFFIX}"
NCCLNAME="nccl-src_${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}${NCCL_SUFFIX}-${NCCL_BUILD}"
tar --exclude build \
--exclude ".git*" \