msccl algorithms tuning for allgather on MI300 (#1110)

Dieser Commit ist enthalten in:
Pedram Alizadeh
2024-03-14 12:18:26 -04:00
committet von GitHub
Ursprung 0867562b18
Commit 50f22e8317
6 geänderte Dateien mit 4 neuen und 2060 gelöschten Zeilen
@@ -1,6 +1,6 @@
<!-- Copyright (c) Microsoft Corporation. -->
<!-- Licensed under the MIT License. -->
<algo name="all_gather_llm" proto="LL" nchannels="1" nchunksperloop="8" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="0" maxBytes="1023">
<algo name="all_gather_llm" proto="LL" nchannels="1" nchunksperloop="8" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="0" maxBytes="8192">
<gpu id="0" i_chunks="0" o_chunks="8" s_chunks="0">
<tb id="0" send="-1" recv="1" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
@@ -1,6 +1,6 @@
<!-- Copyright (c) Microsoft Corporation. -->
<!-- Licensed under the MIT License. -->
<algo name="all_gather_llm" proto="Simple" nchannels="32" nchunksperloop="32" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="1048576" maxBytes="4194303">
<algo name="all_gather_llm" proto="Simple" nchannels="32" nchunksperloop="32" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="131073" maxBytes="1048575">
<gpu id="0" i_chunks="0" o_chunks="32" s_chunks="0">
<tb id="0" send="-1" recv="1" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
@@ -1,6 +1,6 @@
<!-- Copyright (c) Microsoft Corporation. -->
<!-- Licensed under the MIT License. -->
<algo name="all_gather_llm" proto="Simple" nchannels="8" nchunksperloop="64" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="4194303" maxBytes="16777216">
<algo name="all_gather_llm" proto="Simple" nchannels="8" nchunksperloop="64" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="1048576" maxBytes="41943040">
<gpu id="0" i_chunks="0" o_chunks="64" s_chunks="0">
<tb id="0" send="1" recv="1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
@@ -1,692 +0,0 @@
<!-- Copyright (c) Microsoft Corporation. -->
<!-- Licensed under the MIT License. -->
<algo name="all_gather_llm" proto="LL" nchannels="16" nchunksperloop="16" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="4096" maxBytes="8191">
<gpu id="0" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="1" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="1" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="2" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="2" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="3" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="3" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="4" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="4" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="5" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="5" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="6" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="6" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="0">
<step s="0" type="s" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="8">
<step s="0" type="s" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="1" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="2" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="2" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="3" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="3" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="4" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="4" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="5" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="5" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="6" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="6" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="1">
<step s="0" type="s" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="9">
<step s="0" type="s" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="2" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="1" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="1" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="3" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="3" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="4" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="4" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="5" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="5" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="6" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="6" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="2">
<step s="0" type="s" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="10">
<step s="0" type="s" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="3" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="1" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="1" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="2" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="2" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="4" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="4" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="5" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="5" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="6" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="6" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="3">
<step s="0" type="s" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="11">
<step s="0" type="s" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="4" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="1" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="1" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="2" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="2" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="3" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="3" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="5" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="5" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="6" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="6" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="4">
<step s="0" type="s" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="12">
<step s="0" type="s" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="5" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="1" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="1" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="2" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="2" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="3" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="3" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="4" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="4" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="6" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="6" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="5">
<step s="0" type="s" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="13">
<step s="0" type="s" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="6" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="7" chan="7">
<step s="0" type="r" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="7" chan="15">
<step s="0" type="r" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="1" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="1" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="2" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="2" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="3" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="3" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="4" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="4" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="5" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="5" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="7" recv="-1" chan="6">
<step s="0" type="s" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="7" recv="-1" chan="14">
<step s="0" type="s" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
<gpu id="7" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="0" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="0" dstbuf="o" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="1" send="-1" recv="0" chan="8">
<step s="0" type="r" srcbuf="o" srcoff="1" dstbuf="o" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="2" send="-1" recv="1" chan="1">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="3" send="-1" recv="1" chan="9">
<step s="0" type="r" srcbuf="o" srcoff="3" dstbuf="o" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="4" send="-1" recv="2" chan="2">
<step s="0" type="r" srcbuf="o" srcoff="4" dstbuf="o" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="5" send="-1" recv="2" chan="10">
<step s="0" type="r" srcbuf="o" srcoff="5" dstbuf="o" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="6" send="-1" recv="3" chan="3">
<step s="0" type="r" srcbuf="o" srcoff="6" dstbuf="o" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="7" send="-1" recv="3" chan="11">
<step s="0" type="r" srcbuf="o" srcoff="7" dstbuf="o" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="8" send="-1" recv="4" chan="4">
<step s="0" type="r" srcbuf="o" srcoff="8" dstbuf="o" dstoff="8" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="9" send="-1" recv="4" chan="12">
<step s="0" type="r" srcbuf="o" srcoff="9" dstbuf="o" dstoff="9" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="10" send="-1" recv="5" chan="5">
<step s="0" type="r" srcbuf="o" srcoff="10" dstbuf="o" dstoff="10" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="11" send="-1" recv="5" chan="13">
<step s="0" type="r" srcbuf="o" srcoff="11" dstbuf="o" dstoff="11" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="12" send="-1" recv="6" chan="6">
<step s="0" type="r" srcbuf="o" srcoff="12" dstbuf="o" dstoff="12" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="13" send="-1" recv="6" chan="14">
<step s="0" type="r" srcbuf="o" srcoff="13" dstbuf="o" dstoff="13" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="14" send="0" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="15" send="0" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="16" send="1" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="17" send="1" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="18" send="2" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="19" send="2" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="20" send="3" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="21" send="3" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="22" send="4" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="23" send="4" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="24" send="5" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="25" send="5" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="26" send="6" recv="-1" chan="7">
<step s="0" type="s" srcbuf="o" srcoff="14" dstbuf="o" dstoff="14" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
<tb id="27" send="6" recv="-1" chan="15">
<step s="0" type="s" srcbuf="o" srcoff="15" dstbuf="o" dstoff="15" cnt="1" depid="-1" deps="-1" hasdep="0"/>
</tb>
</gpu>
</algo>
@@ -1,6 +1,6 @@
<!-- Copyright (c) Microsoft Corporation. -->
<!-- Licensed under the MIT License. -->
<algo name="all_gather_llm" proto="LL" nchannels="2" nchunksperloop="16" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="1024" maxBytes="4095">
<algo name="all_gather_llm" proto="LL" nchannels="2" nchunksperloop="16" ngpus="8" coll="allgather" inplace="1" outofplace="0" minBytes="8193" maxBytes="131072">
<gpu id="0" i_chunks="0" o_chunks="16" s_chunks="0">
<tb id="0" send="-1" recv="1" chan="0">
<step s="0" type="r" srcbuf="o" srcoff="2" dstbuf="o" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
Datei-Diff unterdrückt, da er zu groß ist Diff laden