blob: 35b0aee1cbb852858e7b5962fcd1c607497248fc [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.43.0 (0)
-->
<!-- Title: %3 Pages: 1 -->
<svg width="2140pt" height="1916pt"
viewBox="0.00 0.00 2140.22 1916.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1912)">
<title>%3</title>
<polygon fill="white" stroke="transparent" points="-4,4 -4,-1912 2136.22,-1912 2136.22,4 -4,4"/>
<!-- 0 -->
<g id="node1" class="node">
<title>0</title>
<ellipse fill="none" stroke="black" cx="1238.18" cy="-1746" rx="170.87" ry="18"/>
<text text-anchor="middle" x="1238.18" y="-1742.3" font-family="Times,serif" font-size="14.00">input: Tensor[(1, 14, 768), float32]</text>
</g>
<!-- 16 -->
<g id="node9" class="node">
<title>16</title>
<polygon fill="none" stroke="black" points="1044.18,-1692 692.18,-1692 692.18,-1656 1044.18,-1656 1044.18,-1692"/>
<text text-anchor="middle" x="868.18" y="-1670.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;16 -->
<g id="edge1" class="edge">
<title>0&#45;&gt;16</title>
<path fill="none" stroke="black" d="M1158.81,-1729.98C1102.99,-1719.42 1027.88,-1705.21 968,-1693.89"/>
<polygon fill="black" stroke="black" points="968.63,-1690.44 958.15,-1692.02 967.33,-1697.32 968.63,-1690.44"/>
</g>
<!-- 26 -->
<g id="node19" class="node">
<title>26</title>
<polygon fill="none" stroke="black" points="1414.18,-1692 1062.18,-1692 1062.18,-1656 1414.18,-1656 1414.18,-1692"/>
<text text-anchor="middle" x="1238.18" y="-1670.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;26 -->
<g id="edge13" class="edge">
<title>0&#45;&gt;26</title>
<path fill="none" stroke="black" d="M1238.18,-1727.7C1238.18,-1719.98 1238.18,-1710.71 1238.18,-1702.11"/>
<polygon fill="black" stroke="black" points="1241.68,-1702.1 1238.18,-1692.1 1234.68,-1702.1 1241.68,-1702.1"/>
</g>
<!-- 47 -->
<g id="node39" class="node">
<title>47</title>
<polygon fill="none" stroke="black" points="1784.18,-1692 1432.18,-1692 1432.18,-1656 1784.18,-1656 1784.18,-1692"/>
<text text-anchor="middle" x="1608.18" y="-1670.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;47 -->
<g id="edge37" class="edge">
<title>0&#45;&gt;47</title>
<path fill="none" stroke="black" d="M1317.56,-1729.98C1373.37,-1719.42 1448.48,-1705.21 1508.36,-1693.89"/>
<polygon fill="black" stroke="black" points="1509.04,-1697.32 1518.21,-1692.02 1507.74,-1690.44 1509.04,-1697.32"/>
</g>
<!-- 1 -->
<g id="node2" class="node">
<title>1</title>
<ellipse fill="none" stroke="black" cx="200.18" cy="-1890" rx="200.36" ry="18"/>
<text text-anchor="middle" x="200.18" y="-1886.3" font-family="Times,serif" font-size="14.00">query.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 17 -->
<g id="node10" class="node">
<title>17</title>
<polygon fill="none" stroke="black" points="310.68,-1836 117.68,-1836 117.68,-1800 310.68,-1800 310.68,-1836"/>
<text text-anchor="middle" x="214.18" y="-1814.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 1&#45;&gt;17 -->
<g id="edge2" class="edge">
<title>1&#45;&gt;17</title>
<path fill="none" stroke="black" d="M203.64,-1871.7C205.19,-1863.98 207.04,-1854.71 208.76,-1846.11"/>
<polygon fill="black" stroke="black" points="212.23,-1846.6 210.76,-1836.1 205.37,-1845.22 212.23,-1846.6"/>
</g>
<!-- 2 -->
<g id="node3" class="node">
<title>2</title>
<ellipse fill="none" stroke="black" cx="184.18" cy="-1530" rx="167.07" ry="18"/>
<text text-anchor="middle" x="184.18" y="-1526.3" font-family="Times,serif" font-size="14.00">query.bias: Tensor[(768,), float32]</text>
</g>
<!-- 22 -->
<g id="node15" class="node">
<title>22</title>
<polygon fill="none" stroke="black" points="578.68,-1476 507.68,-1476 507.68,-1440 578.68,-1440 578.68,-1476"/>
<text text-anchor="middle" x="543.18" y="-1454.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 2&#45;&gt;22 -->
<g id="edge9" class="edge">
<title>2&#45;&gt;22</title>
<path fill="none" stroke="black" d="M261.2,-1513.98C333.06,-1499.97 437.89,-1479.53 497.41,-1467.92"/>
<polygon fill="black" stroke="black" points="498.13,-1471.35 507.27,-1466 496.79,-1464.48 498.13,-1471.35"/>
</g>
<!-- 3 -->
<g id="node4" class="node">
<title>3</title>
<ellipse fill="none" stroke="black" cx="608.18" cy="-1890" rx="189.57" ry="18"/>
<text text-anchor="middle" x="608.18" y="-1886.3" font-family="Times,serif" font-size="14.00">key.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 27 -->
<g id="node20" class="node">
<title>27</title>
<polygon fill="none" stroke="black" points="700.68,-1836 507.68,-1836 507.68,-1800 700.68,-1800 700.68,-1836"/>
<text text-anchor="middle" x="604.18" y="-1814.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 3&#45;&gt;27 -->
<g id="edge14" class="edge">
<title>3&#45;&gt;27</title>
<path fill="none" stroke="black" d="M607.19,-1871.7C606.75,-1863.98 606.22,-1854.71 605.73,-1846.11"/>
<polygon fill="black" stroke="black" points="609.22,-1845.89 605.16,-1836.1 602.24,-1846.29 609.22,-1845.89"/>
</g>
<!-- 4 -->
<g id="node5" class="node">
<title>4</title>
<ellipse fill="none" stroke="black" cx="891.18" cy="-1530" rx="156.77" ry="18"/>
<text text-anchor="middle" x="891.18" y="-1526.3" font-family="Times,serif" font-size="14.00">key.bias: Tensor[(768,), float32]</text>
</g>
<!-- 32 -->
<g id="node25" class="node">
<title>32</title>
<polygon fill="none" stroke="black" points="1033.68,-1476 962.68,-1476 962.68,-1440 1033.68,-1440 1033.68,-1476"/>
<text text-anchor="middle" x="998.18" y="-1454.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 4&#45;&gt;32 -->
<g id="edge21" class="edge">
<title>4&#45;&gt;32</title>
<path fill="none" stroke="black" d="M916.81,-1512.23C930.77,-1503.1 948.23,-1491.68 963.4,-1481.76"/>
<polygon fill="black" stroke="black" points="965.57,-1484.52 972.02,-1476.12 961.73,-1478.66 965.57,-1484.52"/>
</g>
<!-- 5 -->
<g id="node6" class="node">
<title>5</title>
<ellipse fill="none" stroke="black" cx="1350.18" cy="-882" rx="217.96" ry="18"/>
<text text-anchor="middle" x="1350.18" y="-878.3" font-family="Times,serif" font-size="14.00">attention_mask: Tensor[(1, 1, 1, 14), float32]</text>
</g>
<!-- 42 -->
<g id="node34" class="node">
<title>42</title>
<polygon fill="none" stroke="black" points="1385.68,-828 1314.68,-828 1314.68,-792 1385.68,-792 1385.68,-828"/>
<text text-anchor="middle" x="1350.18" y="-806.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 5&#45;&gt;42 -->
<g id="edge32" class="edge">
<title>5&#45;&gt;42</title>
<path fill="none" stroke="black" d="M1350.18,-863.7C1350.18,-855.98 1350.18,-846.71 1350.18,-838.11"/>
<polygon fill="black" stroke="black" points="1353.68,-838.1 1350.18,-828.1 1346.68,-838.1 1353.68,-838.1"/>
</g>
<!-- 6 -->
<g id="node7" class="node">
<title>6</title>
<ellipse fill="none" stroke="black" cx="1908.18" cy="-1890" rx="200.36" ry="18"/>
<text text-anchor="middle" x="1908.18" y="-1886.3" font-family="Times,serif" font-size="14.00">value.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 48 -->
<g id="node40" class="node">
<title>48</title>
<polygon fill="none" stroke="black" points="2004.68,-1836 1811.68,-1836 1811.68,-1800 2004.68,-1800 2004.68,-1836"/>
<text text-anchor="middle" x="1908.18" y="-1814.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 6&#45;&gt;48 -->
<g id="edge38" class="edge">
<title>6&#45;&gt;48</title>
<path fill="none" stroke="black" d="M1908.18,-1871.7C1908.18,-1863.98 1908.18,-1854.71 1908.18,-1846.11"/>
<polygon fill="black" stroke="black" points="1911.68,-1846.1 1908.18,-1836.1 1904.68,-1846.1 1911.68,-1846.1"/>
</g>
<!-- 7 -->
<g id="node8" class="node">
<title>7</title>
<ellipse fill="none" stroke="black" cx="1965.18" cy="-1530" rx="167.07" ry="18"/>
<text text-anchor="middle" x="1965.18" y="-1526.3" font-family="Times,serif" font-size="14.00">value.bias: Tensor[(768,), float32]</text>
</g>
<!-- 53 -->
<g id="node45" class="node">
<title>53</title>
<polygon fill="none" stroke="black" points="1641.68,-1476 1570.68,-1476 1570.68,-1440 1641.68,-1440 1641.68,-1476"/>
<text text-anchor="middle" x="1606.18" y="-1454.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 7&#45;&gt;53 -->
<g id="edge45" class="edge">
<title>7&#45;&gt;53</title>
<path fill="none" stroke="black" d="M1888.17,-1513.98C1816.3,-1499.97 1711.47,-1479.53 1651.95,-1467.92"/>
<polygon fill="black" stroke="black" points="1652.58,-1464.48 1642.09,-1466 1651.24,-1471.35 1652.58,-1464.48"/>
</g>
<!-- 20 -->
<g id="node13" class="node">
<title>20</title>
<polygon fill="none" stroke="black" points="627.68,-1620 458.68,-1620 458.68,-1584 627.68,-1584 627.68,-1620"/>
<text text-anchor="middle" x="543.18" y="-1598.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 16&#45;&gt;20 -->
<g id="edge5" class="edge">
<title>16&#45;&gt;20</title>
<path fill="none" stroke="black" d="M789.09,-1655.97C741.72,-1645.76 681.37,-1632.76 632.31,-1622.2"/>
<polygon fill="black" stroke="black" points="632.86,-1618.73 622.34,-1620.05 631.38,-1625.58 632.86,-1618.73"/>
</g>
<!-- 18 -->
<g id="node11" class="node">
<title>18</title>
<polygon fill="none" stroke="black" points="401.68,-1764 40.68,-1764 40.68,-1728 401.68,-1728 401.68,-1764"/>
<text text-anchor="middle" x="221.18" y="-1742.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 17&#45;&gt;18 -->
<g id="edge3" class="edge">
<title>17&#45;&gt;18</title>
<path fill="none" stroke="black" d="M215.91,-1799.7C216.68,-1791.98 217.61,-1782.71 218.47,-1774.11"/>
<polygon fill="black" stroke="black" points="221.96,-1774.4 219.47,-1764.1 214.99,-1773.71 221.96,-1774.4"/>
</g>
<!-- 19 -->
<g id="node12" class="node">
<title>19</title>
<polygon fill="none" stroke="black" points="405.68,-1692 194.68,-1692 194.68,-1656 405.68,-1656 405.68,-1692"/>
<text text-anchor="middle" x="300.18" y="-1670.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 18&#45;&gt;19 -->
<g id="edge4" class="edge">
<title>18&#45;&gt;19</title>
<path fill="none" stroke="black" d="M240.71,-1727.7C250.56,-1718.97 262.67,-1708.24 273.38,-1698.75"/>
<polygon fill="black" stroke="black" points="275.72,-1701.36 280.88,-1692.1 271.07,-1696.12 275.72,-1701.36"/>
</g>
<!-- 19&#45;&gt;20 -->
<g id="edge6" class="edge">
<title>19&#45;&gt;20</title>
<path fill="none" stroke="black" d="M359.32,-1655.97C393.86,-1646.01 437.63,-1633.41 473.8,-1622.99"/>
<polygon fill="black" stroke="black" points="475.14,-1626.24 483.78,-1620.11 473.21,-1619.52 475.14,-1626.24"/>
</g>
<!-- 21 -->
<g id="node14" class="node">
<title>21</title>
<polygon fill="none" stroke="black" points="716.68,-1548 369.68,-1548 369.68,-1512 716.68,-1512 716.68,-1548"/>
<text text-anchor="middle" x="543.18" y="-1526.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 20&#45;&gt;21 -->
<g id="edge7" class="edge">
<title>20&#45;&gt;21</title>
<path fill="none" stroke="black" d="M543.18,-1583.7C543.18,-1575.98 543.18,-1566.71 543.18,-1558.11"/>
<polygon fill="black" stroke="black" points="546.68,-1558.1 543.18,-1548.1 539.68,-1558.1 546.68,-1558.1"/>
</g>
<!-- 21&#45;&gt;22 -->
<g id="edge8" class="edge">
<title>21&#45;&gt;22</title>
<path fill="none" stroke="black" d="M543.18,-1511.7C543.18,-1503.98 543.18,-1494.71 543.18,-1486.11"/>
<polygon fill="black" stroke="black" points="546.68,-1486.1 543.18,-1476.1 539.68,-1486.1 546.68,-1486.1"/>
</g>
<!-- 23 -->
<g id="node16" class="node">
<title>23</title>
<polygon fill="none" stroke="black" points="797.68,-1404 432.68,-1404 432.68,-1368 797.68,-1368 797.68,-1404"/>
<text text-anchor="middle" x="615.18" y="-1382.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 12, 64], reverse=0)</text>
</g>
<!-- 22&#45;&gt;23 -->
<g id="edge10" class="edge">
<title>22&#45;&gt;23</title>
<path fill="none" stroke="black" d="M560.98,-1439.7C569.79,-1431.14 580.56,-1420.66 590.18,-1411.3"/>
<polygon fill="black" stroke="black" points="592.86,-1413.58 597.59,-1404.1 587.98,-1408.57 592.86,-1413.58"/>
</g>
<!-- 24 -->
<g id="node17" class="node">
<title>24</title>
<polygon fill="none" stroke="black" points="740.68,-1332 511.68,-1332 511.68,-1296 740.68,-1296 740.68,-1332"/>
<text text-anchor="middle" x="626.18" y="-1310.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 23&#45;&gt;24 -->
<g id="edge11" class="edge">
<title>23&#45;&gt;24</title>
<path fill="none" stroke="black" d="M617.9,-1367.7C619.11,-1359.98 620.57,-1350.71 621.92,-1342.11"/>
<polygon fill="black" stroke="black" points="625.4,-1342.53 623.49,-1332.1 618.48,-1341.44 625.4,-1342.53"/>
</g>
<!-- 25 -->
<g id="node18" class="node">
<title>25</title>
<polygon fill="none" stroke="black" points="863.68,-1116 520.68,-1116 520.68,-1080 863.68,-1080 863.68,-1116"/>
<text text-anchor="middle" x="692.18" y="-1094.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 14, 64], reverse=0)</text>
</g>
<!-- 24&#45;&gt;25 -->
<g id="edge12" class="edge">
<title>24&#45;&gt;25</title>
<path fill="none" stroke="black" d="M631.47,-1295.85C642.94,-1258.68 670.15,-1170.44 683.91,-1125.82"/>
<polygon fill="black" stroke="black" points="687.26,-1126.82 686.87,-1116.23 680.58,-1124.76 687.26,-1126.82"/>
</g>
<!-- 38 -->
<g id="node31" class="node">
<title>38</title>
<polygon fill="none" stroke="black" points="1082.68,-1044 913.68,-1044 913.68,-1008 1082.68,-1008 1082.68,-1044"/>
<text text-anchor="middle" x="998.18" y="-1022.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 25&#45;&gt;38 -->
<g id="edge27" class="edge">
<title>25&#45;&gt;38</title>
<path fill="none" stroke="black" d="M766.65,-1079.97C811.06,-1069.8 867.6,-1056.87 913.68,-1046.33"/>
<polygon fill="black" stroke="black" points="914.68,-1049.69 923.65,-1044.05 913.12,-1042.87 914.68,-1049.69"/>
</g>
<!-- 30 -->
<g id="node23" class="node">
<title>30</title>
<polygon fill="none" stroke="black" points="1322.68,-1620 1153.68,-1620 1153.68,-1584 1322.68,-1584 1322.68,-1620"/>
<text text-anchor="middle" x="1238.18" y="-1598.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 26&#45;&gt;30 -->
<g id="edge17" class="edge">
<title>26&#45;&gt;30</title>
<path fill="none" stroke="black" d="M1238.18,-1655.7C1238.18,-1647.98 1238.18,-1638.71 1238.18,-1630.11"/>
<polygon fill="black" stroke="black" points="1241.68,-1630.1 1238.18,-1620.1 1234.68,-1630.1 1241.68,-1630.1"/>
</g>
<!-- 28 -->
<g id="node21" class="node">
<title>28</title>
<polygon fill="none" stroke="black" points="782.68,-1764 421.68,-1764 421.68,-1728 782.68,-1728 782.68,-1764"/>
<text text-anchor="middle" x="602.18" y="-1742.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 27&#45;&gt;28 -->
<g id="edge15" class="edge">
<title>27&#45;&gt;28</title>
<path fill="none" stroke="black" d="M603.69,-1799.7C603.47,-1791.98 603.2,-1782.71 602.96,-1774.11"/>
<polygon fill="black" stroke="black" points="606.46,-1774 602.67,-1764.1 599.46,-1774.2 606.46,-1774"/>
</g>
<!-- 29 -->
<g id="node22" class="node">
<title>29</title>
<polygon fill="none" stroke="black" points="673.68,-1692 462.68,-1692 462.68,-1656 673.68,-1656 673.68,-1692"/>
<text text-anchor="middle" x="568.18" y="-1670.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 28&#45;&gt;29 -->
<g id="edge16" class="edge">
<title>28&#45;&gt;29</title>
<path fill="none" stroke="black" d="M593.78,-1727.7C589.91,-1719.73 585.23,-1710.1 580.94,-1701.26"/>
<polygon fill="black" stroke="black" points="584.01,-1699.57 576.49,-1692.1 577.71,-1702.63 584.01,-1699.57"/>
</g>
<!-- 29&#45;&gt;30 -->
<g id="edge18" class="edge">
<title>29&#45;&gt;30</title>
<path fill="none" stroke="black" d="M673.75,-1657.18C676.93,-1656.77 680.08,-1656.38 683.18,-1656 844.38,-1636.4 1032.65,-1619.73 1143.28,-1610.56"/>
<polygon fill="black" stroke="black" points="1143.73,-1614.04 1153.41,-1609.73 1143.16,-1607.06 1143.73,-1614.04"/>
</g>
<!-- 31 -->
<g id="node24" class="node">
<title>31</title>
<polygon fill="none" stroke="black" points="1412.68,-1548 1065.68,-1548 1065.68,-1512 1412.68,-1512 1412.68,-1548"/>
<text text-anchor="middle" x="1239.18" y="-1526.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 30&#45;&gt;31 -->
<g id="edge19" class="edge">
<title>30&#45;&gt;31</title>
<path fill="none" stroke="black" d="M1238.43,-1583.7C1238.54,-1575.98 1238.67,-1566.71 1238.79,-1558.11"/>
<polygon fill="black" stroke="black" points="1242.29,-1558.15 1238.94,-1548.1 1235.3,-1558.05 1242.29,-1558.15"/>
</g>
<!-- 31&#45;&gt;32 -->
<g id="edge20" class="edge">
<title>31&#45;&gt;32</title>
<path fill="none" stroke="black" d="M1180.54,-1511.97C1138.46,-1499.74 1082.57,-1483.51 1043.81,-1472.25"/>
<polygon fill="black" stroke="black" points="1044.43,-1468.79 1033.85,-1469.36 1042.48,-1475.51 1044.43,-1468.79"/>
</g>
<!-- 33 -->
<g id="node26" class="node">
<title>33</title>
<polygon fill="none" stroke="black" points="1180.68,-1404 815.68,-1404 815.68,-1368 1180.68,-1368 1180.68,-1404"/>
<text text-anchor="middle" x="998.18" y="-1382.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 12, 64], reverse=0)</text>
</g>
<!-- 32&#45;&gt;33 -->
<g id="edge22" class="edge">
<title>32&#45;&gt;33</title>
<path fill="none" stroke="black" d="M998.18,-1439.7C998.18,-1431.98 998.18,-1422.71 998.18,-1414.11"/>
<polygon fill="black" stroke="black" points="1001.68,-1414.1 998.18,-1404.1 994.68,-1414.1 1001.68,-1414.1"/>
</g>
<!-- 34 -->
<g id="node27" class="node">
<title>34</title>
<polygon fill="none" stroke="black" points="1112.68,-1332 883.68,-1332 883.68,-1296 1112.68,-1296 1112.68,-1332"/>
<text text-anchor="middle" x="998.18" y="-1310.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 33&#45;&gt;34 -->
<g id="edge23" class="edge">
<title>33&#45;&gt;34</title>
<path fill="none" stroke="black" d="M998.18,-1367.7C998.18,-1359.98 998.18,-1350.71 998.18,-1342.11"/>
<polygon fill="black" stroke="black" points="1001.68,-1342.1 998.18,-1332.1 994.68,-1342.1 1001.68,-1342.1"/>
</g>
<!-- 35 -->
<g id="node28" class="node">
<title>35</title>
<polygon fill="none" stroke="black" points="1112.68,-1260 883.68,-1260 883.68,-1224 1112.68,-1224 1112.68,-1260"/>
<text text-anchor="middle" x="998.18" y="-1238.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 1, 3, 2])</text>
</g>
<!-- 34&#45;&gt;35 -->
<g id="edge24" class="edge">
<title>34&#45;&gt;35</title>
<path fill="none" stroke="black" d="M998.18,-1295.7C998.18,-1287.98 998.18,-1278.71 998.18,-1270.11"/>
<polygon fill="black" stroke="black" points="1001.68,-1270.1 998.18,-1260.1 994.68,-1270.1 1001.68,-1270.1"/>
</g>
<!-- 36 -->
<g id="node29" class="node">
<title>36</title>
<polygon fill="none" stroke="black" points="1169.68,-1188 826.68,-1188 826.68,-1152 1169.68,-1152 1169.68,-1188"/>
<text text-anchor="middle" x="998.18" y="-1166.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 64, 14], reverse=0)</text>
</g>
<!-- 35&#45;&gt;36 -->
<g id="edge25" class="edge">
<title>35&#45;&gt;36</title>
<path fill="none" stroke="black" d="M998.18,-1223.7C998.18,-1215.98 998.18,-1206.71 998.18,-1198.11"/>
<polygon fill="black" stroke="black" points="1001.68,-1198.1 998.18,-1188.1 994.68,-1198.1 1001.68,-1198.1"/>
</g>
<!-- 37 -->
<g id="node30" class="node">
<title>37</title>
<polygon fill="none" stroke="black" points="1103.68,-1116 892.68,-1116 892.68,-1080 1103.68,-1080 1103.68,-1116"/>
<text text-anchor="middle" x="998.18" y="-1094.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 36&#45;&gt;37 -->
<g id="edge26" class="edge">
<title>36&#45;&gt;37</title>
<path fill="none" stroke="black" d="M998.18,-1151.7C998.18,-1143.98 998.18,-1134.71 998.18,-1126.11"/>
<polygon fill="black" stroke="black" points="1001.68,-1126.1 998.18,-1116.1 994.68,-1126.1 1001.68,-1126.1"/>
</g>
<!-- 37&#45;&gt;38 -->
<g id="edge28" class="edge">
<title>37&#45;&gt;38</title>
<path fill="none" stroke="black" d="M998.18,-1079.7C998.18,-1071.98 998.18,-1062.71 998.18,-1054.11"/>
<polygon fill="black" stroke="black" points="1001.68,-1054.1 998.18,-1044.1 994.68,-1054.1 1001.68,-1054.1"/>
</g>
<!-- 39 -->
<g id="node32" class="node">
<title>39</title>
<polygon fill="none" stroke="black" points="1216.68,-972 851.68,-972 851.68,-936 1216.68,-936 1216.68,-972"/>
<text text-anchor="middle" x="1034.18" y="-950.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 12, 14, 14], reverse=0)</text>
</g>
<!-- 38&#45;&gt;39 -->
<g id="edge29" class="edge">
<title>38&#45;&gt;39</title>
<path fill="none" stroke="black" d="M1007.08,-1007.7C1011.18,-999.73 1016.13,-990.1 1020.67,-981.26"/>
<polygon fill="black" stroke="black" points="1023.92,-982.6 1025.39,-972.1 1017.7,-979.4 1023.92,-982.6"/>
</g>
<!-- 41 -->
<g id="node33" class="node">
<title>41</title>
<polygon fill="none" stroke="black" points="1105.68,-900 998.68,-900 998.68,-864 1105.68,-864 1105.68,-900"/>
<text text-anchor="middle" x="1052.18" y="-878.3" font-family="Times,serif" font-size="14.00">divide(·, 8.0)</text>
</g>
<!-- 39&#45;&gt;41 -->
<g id="edge30" class="edge">
<title>39&#45;&gt;41</title>
<path fill="none" stroke="black" d="M1038.63,-935.7C1040.64,-927.9 1043.05,-918.51 1045.28,-909.83"/>
<polygon fill="black" stroke="black" points="1048.68,-910.66 1047.78,-900.1 1041.9,-908.92 1048.68,-910.66"/>
</g>
<!-- 41&#45;&gt;42 -->
<g id="edge31" class="edge">
<title>41&#45;&gt;42</title>
<path fill="none" stroke="black" d="M1106.06,-868.07C1111.84,-866.69 1117.63,-865.31 1123.18,-864 1185.98,-849.15 1258.61,-832.24 1304.46,-821.6"/>
<polygon fill="black" stroke="black" points="1305.44,-824.96 1314.39,-819.29 1303.86,-818.14 1305.44,-824.96"/>
</g>
<!-- 43 -->
<g id="node35" class="node">
<title>43</title>
<polygon fill="none" stroke="black" points="1445.18,-756 1271.18,-756 1271.18,-720 1445.18,-720 1445.18,-756"/>
<text text-anchor="middle" x="1358.18" y="-734.3" font-family="Times,serif" font-size="14.00">nn.softmax(·| axis=&#45;1)</text>
</g>
<!-- 42&#45;&gt;43 -->
<g id="edge33" class="edge">
<title>42&#45;&gt;43</title>
<path fill="none" stroke="black" d="M1352.16,-791.7C1353.04,-783.98 1354.1,-774.71 1355.08,-766.11"/>
<polygon fill="black" stroke="black" points="1358.57,-766.44 1356.23,-756.1 1351.61,-765.64 1358.57,-766.44"/>
</g>
<!-- 44 -->
<g id="node36" class="node">
<title>44</title>
<polygon fill="none" stroke="black" points="1472.68,-684 1289.68,-684 1289.68,-648 1472.68,-648 1472.68,-684"/>
<text text-anchor="middle" x="1381.18" y="-662.3" font-family="Times,serif" font-size="14.00">nn.dropout(·| rate=0.1)</text>
</g>
<!-- 43&#45;&gt;44 -->
<g id="edge34" class="edge">
<title>43&#45;&gt;44</title>
<path fill="none" stroke="black" d="M1363.87,-719.7C1366.43,-711.9 1369.52,-702.51 1372.37,-693.83"/>
<polygon fill="black" stroke="black" points="1375.77,-694.7 1375.56,-684.1 1369.12,-692.51 1375.77,-694.7"/>
</g>
<!-- 45 -->
<g id="node37" class="node">
<title>45</title>
<polygon fill="none" stroke="black" points="1477.18,-612 1309.18,-612 1309.18,-576 1477.18,-576 1477.18,-612"/>
<text text-anchor="middle" x="1393.18" y="-590.3" font-family="Times,serif" font-size="14.00">TupleGetItem(idx=0)</text>
</g>
<!-- 44&#45;&gt;45 -->
<g id="edge35" class="edge">
<title>44&#45;&gt;45</title>
<path fill="none" stroke="black" d="M1384.15,-647.7C1385.47,-639.98 1387.06,-630.71 1388.53,-622.11"/>
<polygon fill="black" stroke="black" points="1392.01,-622.55 1390.25,-612.1 1385.11,-621.37 1392.01,-622.55"/>
</g>
<!-- 46 -->
<g id="node38" class="node">
<title>46</title>
<polygon fill="none" stroke="black" points="1570.68,-540 1227.68,-540 1227.68,-504 1570.68,-504 1570.68,-540"/>
<text text-anchor="middle" x="1399.18" y="-518.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 14, 14], reverse=0)</text>
</g>
<!-- 45&#45;&gt;46 -->
<g id="edge36" class="edge">
<title>45&#45;&gt;46</title>
<path fill="none" stroke="black" d="M1394.67,-575.7C1395.33,-567.98 1396.12,-558.71 1396.86,-550.11"/>
<polygon fill="black" stroke="black" points="1400.35,-550.37 1397.72,-540.1 1393.37,-549.77 1400.35,-550.37"/>
</g>
<!-- 58 -->
<g id="node50" class="node">
<title>58</title>
<polygon fill="none" stroke="black" points="1582.68,-468 1413.68,-468 1413.68,-432 1582.68,-432 1582.68,-468"/>
<text text-anchor="middle" x="1498.18" y="-446.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 46&#45;&gt;58 -->
<g id="edge50" class="edge">
<title>46&#45;&gt;58</title>
<path fill="none" stroke="black" d="M1423.4,-503.88C1436.1,-494.89 1451.85,-483.76 1465.61,-474.03"/>
<polygon fill="black" stroke="black" points="1467.73,-476.82 1473.87,-468.19 1463.69,-471.11 1467.73,-476.82"/>
</g>
<!-- 51 -->
<g id="node43" class="node">
<title>51</title>
<polygon fill="none" stroke="black" points="1692.68,-1620 1523.68,-1620 1523.68,-1584 1692.68,-1584 1692.68,-1620"/>
<text text-anchor="middle" x="1608.18" y="-1598.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 47&#45;&gt;51 -->
<g id="edge41" class="edge">
<title>47&#45;&gt;51</title>
<path fill="none" stroke="black" d="M1608.18,-1655.7C1608.18,-1647.98 1608.18,-1638.71 1608.18,-1630.11"/>
<polygon fill="black" stroke="black" points="1611.68,-1630.1 1608.18,-1620.1 1604.68,-1630.1 1611.68,-1630.1"/>
</g>
<!-- 49 -->
<g id="node41" class="node">
<title>49</title>
<polygon fill="none" stroke="black" points="2088.68,-1764 1727.68,-1764 1727.68,-1728 2088.68,-1728 2088.68,-1764"/>
<text text-anchor="middle" x="1908.18" y="-1742.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 48&#45;&gt;49 -->
<g id="edge39" class="edge">
<title>48&#45;&gt;49</title>
<path fill="none" stroke="black" d="M1908.18,-1799.7C1908.18,-1791.98 1908.18,-1782.71 1908.18,-1774.11"/>
<polygon fill="black" stroke="black" points="1911.68,-1774.1 1908.18,-1764.1 1904.68,-1774.1 1911.68,-1774.1"/>
</g>
<!-- 50 -->
<g id="node42" class="node">
<title>50</title>
<polygon fill="none" stroke="black" points="2013.68,-1692 1802.68,-1692 1802.68,-1656 2013.68,-1656 2013.68,-1692"/>
<text text-anchor="middle" x="1908.18" y="-1670.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 49&#45;&gt;50 -->
<g id="edge40" class="edge">
<title>49&#45;&gt;50</title>
<path fill="none" stroke="black" d="M1908.18,-1727.7C1908.18,-1719.98 1908.18,-1710.71 1908.18,-1702.11"/>
<polygon fill="black" stroke="black" points="1911.68,-1702.1 1908.18,-1692.1 1904.68,-1702.1 1911.68,-1702.1"/>
</g>
<!-- 50&#45;&gt;51 -->
<g id="edge42" class="edge">
<title>50&#45;&gt;51</title>
<path fill="none" stroke="black" d="M1835.18,-1655.97C1791.63,-1645.8 1736.21,-1632.87 1691.03,-1622.33"/>
<polygon fill="black" stroke="black" points="1691.79,-1618.91 1681.25,-1620.05 1690.2,-1625.73 1691.79,-1618.91"/>
</g>
<!-- 52 -->
<g id="node44" class="node">
<title>52</title>
<polygon fill="none" stroke="black" points="1779.68,-1548 1432.68,-1548 1432.68,-1512 1779.68,-1512 1779.68,-1548"/>
<text text-anchor="middle" x="1606.18" y="-1526.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 51&#45;&gt;52 -->
<g id="edge43" class="edge">
<title>51&#45;&gt;52</title>
<path fill="none" stroke="black" d="M1607.69,-1583.7C1607.47,-1575.98 1607.2,-1566.71 1606.96,-1558.11"/>
<polygon fill="black" stroke="black" points="1610.46,-1558 1606.67,-1548.1 1603.46,-1558.2 1610.46,-1558"/>
</g>
<!-- 52&#45;&gt;53 -->
<g id="edge44" class="edge">
<title>52&#45;&gt;53</title>
<path fill="none" stroke="black" d="M1606.18,-1511.7C1606.18,-1503.98 1606.18,-1494.71 1606.18,-1486.11"/>
<polygon fill="black" stroke="black" points="1609.68,-1486.1 1606.18,-1476.1 1602.68,-1486.1 1609.68,-1486.1"/>
</g>
<!-- 54 -->
<g id="node46" class="node">
<title>54</title>
<polygon fill="none" stroke="black" points="1787.68,-1404 1422.68,-1404 1422.68,-1368 1787.68,-1368 1787.68,-1404"/>
<text text-anchor="middle" x="1605.18" y="-1382.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 12, 64], reverse=0)</text>
</g>
<!-- 53&#45;&gt;54 -->
<g id="edge46" class="edge">
<title>53&#45;&gt;54</title>
<path fill="none" stroke="black" d="M1605.94,-1439.7C1605.82,-1431.98 1605.69,-1422.71 1605.57,-1414.11"/>
<polygon fill="black" stroke="black" points="1609.07,-1414.05 1605.43,-1404.1 1602.07,-1414.15 1609.07,-1414.05"/>
</g>
<!-- 55 -->
<g id="node47" class="node">
<title>55</title>
<polygon fill="none" stroke="black" points="1719.68,-1332 1490.68,-1332 1490.68,-1296 1719.68,-1296 1719.68,-1332"/>
<text text-anchor="middle" x="1605.18" y="-1310.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 54&#45;&gt;55 -->
<g id="edge47" class="edge">
<title>54&#45;&gt;55</title>
<path fill="none" stroke="black" d="M1605.18,-1367.7C1605.18,-1359.98 1605.18,-1350.71 1605.18,-1342.11"/>
<polygon fill="black" stroke="black" points="1608.68,-1342.1 1605.18,-1332.1 1601.68,-1342.1 1608.68,-1342.1"/>
</g>
<!-- 56 -->
<g id="node48" class="node">
<title>56</title>
<polygon fill="none" stroke="black" points="1775.68,-1260 1432.68,-1260 1432.68,-1224 1775.68,-1224 1775.68,-1260"/>
<text text-anchor="middle" x="1604.18" y="-1238.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[&#45;1, 14, 64], reverse=0)</text>
</g>
<!-- 55&#45;&gt;56 -->
<g id="edge48" class="edge">
<title>55&#45;&gt;56</title>
<path fill="none" stroke="black" d="M1604.94,-1295.7C1604.82,-1287.98 1604.69,-1278.71 1604.57,-1270.11"/>
<polygon fill="black" stroke="black" points="1608.07,-1270.05 1604.43,-1260.1 1601.07,-1270.15 1608.07,-1270.05"/>
</g>
<!-- 57 -->
<g id="node49" class="node">
<title>57</title>
<polygon fill="none" stroke="black" points="1706.68,-1044 1495.68,-1044 1495.68,-1008 1706.68,-1008 1706.68,-1044"/>
<text text-anchor="middle" x="1601.18" y="-1022.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 56&#45;&gt;57 -->
<g id="edge49" class="edge">
<title>56&#45;&gt;57</title>
<path fill="none" stroke="black" d="M1603.94,-1223.85C1603.42,-1186.83 1602.19,-1099.18 1601.57,-1054.39"/>
<polygon fill="black" stroke="black" points="1605.06,-1054.18 1601.42,-1044.23 1598.06,-1054.28 1605.06,-1054.18"/>
</g>
<!-- 57&#45;&gt;58 -->
<g id="edge51" class="edge">
<title>57&#45;&gt;58</title>
<path fill="none" stroke="black" d="M1601.18,-1007.95C1601.18,-981.29 1601.18,-928.11 1601.18,-883 1601.18,-883 1601.18,-883 1601.18,-593 1601.18,-552.36 1603.61,-537.21 1580.18,-504 1571.45,-491.62 1558.82,-481.42 1546.08,-473.4"/>
<polygon fill="black" stroke="black" points="1547.67,-470.27 1537.29,-468.19 1544.1,-476.3 1547.67,-470.27"/>
</g>
<!-- 59 -->
<g id="node51" class="node">
<title>59</title>
<polygon fill="none" stroke="black" points="1680.68,-396 1315.68,-396 1315.68,-360 1680.68,-360 1680.68,-396"/>
<text text-anchor="middle" x="1498.18" y="-374.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 12, 14, 64], reverse=0)</text>
</g>
<!-- 58&#45;&gt;59 -->
<g id="edge52" class="edge">
<title>58&#45;&gt;59</title>
<path fill="none" stroke="black" d="M1498.18,-431.7C1498.18,-423.98 1498.18,-414.71 1498.18,-406.11"/>
<polygon fill="black" stroke="black" points="1501.68,-406.1 1498.18,-396.1 1494.68,-406.1 1501.68,-406.1"/>
</g>
<!-- 60 -->
<g id="node52" class="node">
<title>60</title>
<polygon fill="none" stroke="black" points="1612.68,-324 1383.68,-324 1383.68,-288 1612.68,-288 1612.68,-324"/>
<text text-anchor="middle" x="1498.18" y="-302.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 59&#45;&gt;60 -->
<g id="edge53" class="edge">
<title>59&#45;&gt;60</title>
<path fill="none" stroke="black" d="M1498.18,-359.7C1498.18,-351.98 1498.18,-342.71 1498.18,-334.11"/>
<polygon fill="black" stroke="black" points="1501.68,-334.1 1498.18,-324.1 1494.68,-334.1 1501.68,-334.1"/>
</g>
<!-- 61 -->
<g id="node53" class="node">
<title>61</title>
<polygon fill="none" stroke="black" points="1530.68,-252 1465.68,-252 1465.68,-216 1530.68,-216 1530.68,-252"/>
<text text-anchor="middle" x="1498.18" y="-230.3" font-family="Times,serif" font-size="14.00">copy(·)</text>
</g>
<!-- 60&#45;&gt;61 -->
<g id="edge54" class="edge">
<title>60&#45;&gt;61</title>
<path fill="none" stroke="black" d="M1498.18,-287.7C1498.18,-279.98 1498.18,-270.71 1498.18,-262.11"/>
<polygon fill="black" stroke="black" points="1501.68,-262.1 1498.18,-252.1 1494.68,-262.1 1501.68,-262.1"/>
</g>
<!-- 62 -->
<g id="node54" class="node">
<title>62</title>
<polygon fill="none" stroke="black" points="1671.68,-180 1324.68,-180 1324.68,-144 1671.68,-144 1671.68,-180"/>
<text text-anchor="middle" x="1498.18" y="-158.3" font-family="Times,serif" font-size="14.00">reshape(·| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 61&#45;&gt;62 -->
<g id="edge55" class="edge">
<title>61&#45;&gt;62</title>
<path fill="none" stroke="black" d="M1498.18,-215.7C1498.18,-207.98 1498.18,-198.71 1498.18,-190.11"/>
<polygon fill="black" stroke="black" points="1501.68,-190.1 1498.18,-180.1 1494.68,-190.1 1501.68,-190.1"/>
</g>
<!-- 63 -->
<g id="node55" class="node">
<title>63</title>
<polygon fill="none" stroke="black" points="1541.18,-108 1455.18,-108 1455.18,-72 1541.18,-72 1541.18,-108"/>
<text text-anchor="middle" x="1498.18" y="-86.3" font-family="Times,serif" font-size="14.00">Tuple[...])</text>
</g>
<!-- 62&#45;&gt;63 -->
<g id="edge56" class="edge">
<title>62&#45;&gt;63</title>
<path fill="none" stroke="black" d="M1498.18,-143.7C1498.18,-135.98 1498.18,-126.71 1498.18,-118.11"/>
<polygon fill="black" stroke="black" points="1501.68,-118.1 1498.18,-108.1 1494.68,-118.1 1501.68,-118.1"/>
</g>
<!-- 64 -->
<g id="node56" class="node">
<title>64</title>
<polygon fill="none" stroke="black" points="1538.18,-36 1458.18,-36 1458.18,0 1538.18,0 1538.18,-36"/>
<text text-anchor="middle" x="1498.18" y="-14.3" font-family="Times,serif" font-size="14.00">Function</text>
</g>
<!-- 63&#45;&gt;64 -->
<g id="edge57" class="edge">
<title>63&#45;&gt;64</title>
<path fill="none" stroke="black" d="M1498.18,-71.7C1498.18,-63.98 1498.18,-54.71 1498.18,-46.11"/>
<polygon fill="black" stroke="black" points="1501.68,-46.1 1498.18,-36.1 1494.68,-46.1 1501.68,-46.1"/>
</g>
</g>
</svg>