Tune MSCCL all-reduce algorithm (#1009)

This commit is contained in:
Ziyue Yang
2023-12-09 07:47:02 +08:00
committed by GitHub
parent baadda4bd8
commit bb144dcd50
3 changed files with 20804 additions and 3218 deletions
@@ -1,4 +1,4 @@
<algo name="allreduce_pairs" proto="LL" nchannels="4" nchunksperloop="256" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="20481" maxBytes="327680">
<algo name="allreduce_pairs" proto="LL" nchannels="4" nchunksperloop="256" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="20481" maxBytes="81919">
<gpu id="0" i_chunks="256" o_chunks="0" s_chunks="224">
<tb id="0" send="-1" recv="-1" chan="0">
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="8" deps="1" hasdep="0"/>
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff