blob: 4521fb8c36ca9b55df40415d4329566e188d3816 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.43.0 (0)
-->
<!-- Title: %3 Pages: 1 -->
<svg width="3671pt" height="3716pt"
viewBox="0.00 0.00 3671.03 3716.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 3712)">
<title>%3</title>
<polygon fill="white" stroke="transparent" points="-4,4 -4,-3712 3667.03,-3712 3667.03,4 -4,4"/>
<!-- 0 -->
<g id="node1" class="node">
<title>0</title>
<ellipse fill="none" stroke="black" cx="2012.5" cy="-3546" rx="170.87" ry="18"/>
<text text-anchor="middle" x="2012.5" y="-3542.3" font-family="Times,serif" font-size="14.00">input: Tensor[(1, 14, 768), float32]</text>
</g>
<!-- 29 -->
<g id="node19" class="node">
<title>29</title>
<polygon fill="none" stroke="black" points="2038.5,-3492 1590.5,-3492 1590.5,-3456 2038.5,-3456 2038.5,-3492"/>
<text text-anchor="middle" x="1814.5" y="-3470.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;29 -->
<g id="edge1" class="edge">
<title>0&#45;&gt;29</title>
<path fill="none" stroke="black" d="M1966.08,-3528.59C1938.02,-3518.67 1902.03,-3505.95 1872.16,-3495.39"/>
<polygon fill="black" stroke="black" points="1873.29,-3492.07 1862.7,-3492.04 1870.96,-3498.67 1873.29,-3492.07"/>
</g>
<!-- 44 -->
<g id="node29" class="node">
<title>44</title>
<polygon fill="none" stroke="black" points="2504.5,-3492 2056.5,-3492 2056.5,-3456 2504.5,-3456 2504.5,-3492"/>
<text text-anchor="middle" x="2280.5" y="-3470.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;44 -->
<g id="edge13" class="edge">
<title>0&#45;&gt;44</title>
<path fill="none" stroke="black" d="M2073.31,-3529.12C2112.49,-3518.88 2163.7,-3505.51 2205.41,-3494.61"/>
<polygon fill="black" stroke="black" points="2206.45,-3497.96 2215.25,-3492.04 2204.69,-3491.19 2206.45,-3497.96"/>
</g>
<!-- 72 -->
<g id="node49" class="node">
<title>72</title>
<polygon fill="none" stroke="black" points="1572.5,-3492 1124.5,-3492 1124.5,-3456 1572.5,-3456 1572.5,-3492"/>
<text text-anchor="middle" x="1348.5" y="-3470.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;72 -->
<g id="edge37" class="edge">
<title>0&#45;&gt;72</title>
<path fill="none" stroke="black" d="M1896.46,-3532.77C1792.43,-3521.8 1638.42,-3505.56 1520.15,-3493.1"/>
<polygon fill="black" stroke="black" points="1520.21,-3489.58 1509.9,-3492.01 1519.48,-3496.54 1520.21,-3489.58"/>
</g>
<!-- 106 -->
<g id="node74" class="node">
<title>106</title>
<polygon fill="none" stroke="black" points="2856,-1476 2785,-1476 2785,-1440 2856,-1440 2856,-1476"/>
<text text-anchor="middle" x="2820.5" y="-1454.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 0&#45;&gt;106 -->
<g id="edge68" class="edge">
<title>0&#45;&gt;106</title>
<path fill="none" stroke="black" d="M2161.62,-3537.11C2367.88,-3525.77 2717.95,-3504.82 2743.5,-3492 2790.25,-3468.55 2820.5,-3455.3 2820.5,-3403 2820.5,-3403 2820.5,-3403 2820.5,-1601 2820.5,-1561 2820.5,-1514.65 2820.5,-1486.08"/>
<polygon fill="black" stroke="black" points="2824,-1486.05 2820.5,-1476.05 2817,-1486.05 2824,-1486.05"/>
</g>
<!-- 1 -->
<g id="node2" class="node">
<title>1</title>
<ellipse fill="none" stroke="black" cx="1743.5" cy="-2682" rx="217.96" ry="18"/>
<text text-anchor="middle" x="1743.5" y="-2678.3" font-family="Times,serif" font-size="14.00">attention_mask: Tensor[(1, 1, 1, 14), float32]</text>
</g>
<!-- 65 -->
<g id="node44" class="node">
<title>65</title>
<polygon fill="none" stroke="black" points="2024,-2628 1953,-2628 1953,-2592 2024,-2592 2024,-2628"/>
<text text-anchor="middle" x="1988.5" y="-2606.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 1&#45;&gt;65 -->
<g id="edge32" class="edge">
<title>1&#45;&gt;65</title>
<path fill="none" stroke="black" d="M1800.94,-2664.59C1844.23,-2652.22 1902.79,-2635.49 1942.89,-2624.03"/>
<polygon fill="black" stroke="black" points="1944.01,-2627.35 1952.67,-2621.24 1942.09,-2620.62 1944.01,-2627.35"/>
</g>
<!-- 2 -->
<g id="node3" class="node">
<title>2</title>
<ellipse fill="none" stroke="black" cx="1000.5" cy="-3690" rx="265.65" ry="18"/>
<text text-anchor="middle" x="1000.5" y="-3686.3" font-family="Times,serif" font-size="14.00">attention.self.query.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 30 -->
<g id="node20" class="node">
<title>30</title>
<polygon fill="none" stroke="black" points="1097,-3636 904,-3636 904,-3600 1097,-3600 1097,-3636"/>
<text text-anchor="middle" x="1000.5" y="-3614.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 2&#45;&gt;30 -->
<g id="edge2" class="edge">
<title>2&#45;&gt;30</title>
<path fill="none" stroke="black" d="M1000.5,-3671.7C1000.5,-3663.98 1000.5,-3654.71 1000.5,-3646.11"/>
<polygon fill="black" stroke="black" points="1004,-3646.1 1000.5,-3636.1 997,-3646.1 1004,-3646.1"/>
</g>
<!-- 3 -->
<g id="node4" class="node">
<title>3</title>
<ellipse fill="none" stroke="black" cx="1637.5" cy="-3330" rx="232.86" ry="18"/>
<text text-anchor="middle" x="1637.5" y="-3326.3" font-family="Times,serif" font-size="14.00">attention.self.query.bias: Tensor[(768,), float32]</text>
</g>
<!-- 37 -->
<g id="node25" class="node">
<title>37</title>
<polygon fill="none" stroke="black" points="1673,-3276 1602,-3276 1602,-3240 1673,-3240 1673,-3276"/>
<text text-anchor="middle" x="1637.5" y="-3254.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 3&#45;&gt;37 -->
<g id="edge9" class="edge">
<title>3&#45;&gt;37</title>
<path fill="none" stroke="black" d="M1637.5,-3311.7C1637.5,-3303.98 1637.5,-3294.71 1637.5,-3286.11"/>
<polygon fill="black" stroke="black" points="1641,-3286.1 1637.5,-3276.1 1634,-3286.1 1641,-3286.1"/>
</g>
<!-- 4 -->
<g id="node5" class="node">
<title>4</title>
<ellipse fill="none" stroke="black" cx="2628.5" cy="-3690" rx="254.55" ry="18"/>
<text text-anchor="middle" x="2628.5" y="-3686.3" font-family="Times,serif" font-size="14.00">attention.self.key.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 45 -->
<g id="node30" class="node">
<title>45</title>
<polygon fill="none" stroke="black" points="2725,-3636 2532,-3636 2532,-3600 2725,-3600 2725,-3636"/>
<text text-anchor="middle" x="2628.5" y="-3614.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 4&#45;&gt;45 -->
<g id="edge14" class="edge">
<title>4&#45;&gt;45</title>
<path fill="none" stroke="black" d="M2628.5,-3671.7C2628.5,-3663.98 2628.5,-3654.71 2628.5,-3646.11"/>
<polygon fill="black" stroke="black" points="2632,-3646.1 2628.5,-3636.1 2625,-3646.1 2632,-3646.1"/>
</g>
<!-- 5 -->
<g id="node6" class="node">
<title>5</title>
<ellipse fill="none" stroke="black" cx="2109.5" cy="-3330" rx="221.76" ry="18"/>
<text text-anchor="middle" x="2109.5" y="-3326.3" font-family="Times,serif" font-size="14.00">attention.self.key.bias: Tensor[(768,), float32]</text>
</g>
<!-- 52 -->
<g id="node35" class="node">
<title>52</title>
<polygon fill="none" stroke="black" points="2145,-3276 2074,-3276 2074,-3240 2145,-3240 2145,-3276"/>
<text text-anchor="middle" x="2109.5" y="-3254.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 5&#45;&gt;52 -->
<g id="edge21" class="edge">
<title>5&#45;&gt;52</title>
<path fill="none" stroke="black" d="M2109.5,-3311.7C2109.5,-3303.98 2109.5,-3294.71 2109.5,-3286.11"/>
<polygon fill="black" stroke="black" points="2113,-3286.1 2109.5,-3276.1 2106,-3286.1 2113,-3286.1"/>
</g>
<!-- 6 -->
<g id="node7" class="node">
<title>6</title>
<ellipse fill="none" stroke="black" cx="336.5" cy="-3690" rx="265.35" ry="18"/>
<text text-anchor="middle" x="336.5" y="-3686.3" font-family="Times,serif" font-size="14.00">attention.self.value.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 73 -->
<g id="node50" class="node">
<title>73</title>
<polygon fill="none" stroke="black" points="433,-3636 240,-3636 240,-3600 433,-3600 433,-3636"/>
<text text-anchor="middle" x="336.5" y="-3614.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 6&#45;&gt;73 -->
<g id="edge38" class="edge">
<title>6&#45;&gt;73</title>
<path fill="none" stroke="black" d="M336.5,-3671.7C336.5,-3663.98 336.5,-3654.71 336.5,-3646.11"/>
<polygon fill="black" stroke="black" points="340,-3646.1 336.5,-3636.1 333,-3646.1 340,-3646.1"/>
</g>
<!-- 7 -->
<g id="node8" class="node">
<title>7</title>
<ellipse fill="none" stroke="black" cx="693.5" cy="-3330" rx="232.06" ry="18"/>
<text text-anchor="middle" x="693.5" y="-3326.3" font-family="Times,serif" font-size="14.00">attention.self.value.bias: Tensor[(768,), float32]</text>
</g>
<!-- 80 -->
<g id="node55" class="node">
<title>80</title>
<polygon fill="none" stroke="black" points="729,-3276 658,-3276 658,-3240 729,-3240 729,-3276"/>
<text text-anchor="middle" x="693.5" y="-3254.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 7&#45;&gt;80 -->
<g id="edge45" class="edge">
<title>7&#45;&gt;80</title>
<path fill="none" stroke="black" d="M693.5,-3311.7C693.5,-3303.98 693.5,-3294.71 693.5,-3286.11"/>
<polygon fill="black" stroke="black" points="697,-3286.1 693.5,-3276.1 690,-3286.1 697,-3286.1"/>
</g>
<!-- 8 -->
<g id="node9" class="node">
<title>8</title>
<ellipse fill="none" stroke="black" cx="1578.5" cy="-2106" rx="282.15" ry="18"/>
<text text-anchor="middle" x="1578.5" y="-2102.3" font-family="Times,serif" font-size="14.00">attention.output.dense.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 96 -->
<g id="node66" class="node">
<title>96</title>
<polygon fill="none" stroke="black" points="1675,-2052 1482,-2052 1482,-2016 1675,-2016 1675,-2052"/>
<text text-anchor="middle" x="1578.5" y="-2030.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 8&#45;&gt;96 -->
<g id="edge57" class="edge">
<title>8&#45;&gt;96</title>
<path fill="none" stroke="black" d="M1578.5,-2087.7C1578.5,-2079.98 1578.5,-2070.71 1578.5,-2062.11"/>
<polygon fill="black" stroke="black" points="1582,-2062.1 1578.5,-2052.1 1575,-2062.1 1582,-2062.1"/>
</g>
<!-- 9 -->
<g id="node10" class="node">
<title>9</title>
<ellipse fill="none" stroke="black" cx="2542.5" cy="-1746" rx="248.86" ry="18"/>
<text text-anchor="middle" x="2542.5" y="-1742.3" font-family="Times,serif" font-size="14.00">attention.output.dense.bias: Tensor[(768,), float32]</text>
</g>
<!-- 103 -->
<g id="node71" class="node">
<title>103</title>
<polygon fill="none" stroke="black" points="2578,-1692 2507,-1692 2507,-1656 2578,-1656 2578,-1692"/>
<text text-anchor="middle" x="2542.5" y="-1670.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 9&#45;&gt;103 -->
<g id="edge64" class="edge">
<title>9&#45;&gt;103</title>
<path fill="none" stroke="black" d="M2542.5,-1727.7C2542.5,-1719.98 2542.5,-1710.71 2542.5,-1702.11"/>
<polygon fill="black" stroke="black" points="2546,-1702.1 2542.5,-1692.1 2539,-1702.1 2546,-1702.1"/>
</g>
<!-- 10 -->
<g id="node11" class="node">
<title>10</title>
<ellipse fill="none" stroke="black" cx="1913.5" cy="-1458" rx="286.75" ry="18"/>
<text text-anchor="middle" x="1913.5" y="-1454.3" font-family="Times,serif" font-size="14.00">attention.output.LayerNorm.weight: Tensor[(768,), float32]</text>
</g>
<!-- 107 -->
<g id="node75" class="node">
<title>107</title>
<polygon fill="none" stroke="black" points="2739.5,-1404 2261.5,-1404 2261.5,-1368 2739.5,-1368 2739.5,-1404"/>
<text text-anchor="middle" x="2500.5" y="-1382.3" font-family="Times,serif" font-size="14.00">nn.layer_norm(·, ·, ·| axis=&#45;1, epsilon=1e&#45;12, center=1, scale=1)</text>
</g>
<!-- 10&#45;&gt;107 -->
<g id="edge70" class="edge">
<title>10&#45;&gt;107</title>
<path fill="none" stroke="black" d="M2040.51,-1441.85C2130.7,-1431.1 2252.23,-1416.61 2347.72,-1405.22"/>
<polygon fill="black" stroke="black" points="2348.24,-1408.68 2357.76,-1404.02 2347.41,-1401.73 2348.24,-1408.68"/>
</g>
<!-- 11 -->
<g id="node12" class="node">
<title>11</title>
<ellipse fill="none" stroke="black" cx="2492.5" cy="-1458" rx="274.05" ry="18"/>
<text text-anchor="middle" x="2492.5" y="-1454.3" font-family="Times,serif" font-size="14.00">attention.output.LayerNorm.bias: Tensor[(768,), float32]</text>
</g>
<!-- 11&#45;&gt;107 -->
<g id="edge71" class="edge">
<title>11&#45;&gt;107</title>
<path fill="none" stroke="black" d="M2494.48,-1439.7C2495.36,-1431.98 2496.42,-1422.71 2497.4,-1414.11"/>
<polygon fill="black" stroke="black" points="2500.89,-1414.44 2498.55,-1404.1 2493.93,-1413.64 2500.89,-1414.44"/>
</g>
<!-- 12 -->
<g id="node13" class="node">
<title>12</title>
<ellipse fill="none" stroke="black" cx="3120.5" cy="-1530" rx="271.85" ry="18"/>
<text text-anchor="middle" x="3120.5" y="-1526.3" font-family="Times,serif" font-size="14.00">intermediate.dense.weight: Tensor[(3072, 768), float32]</text>
</g>
<!-- 110 -->
<g id="node77" class="node">
<title>110</title>
<polygon fill="none" stroke="black" points="3214,-1476 3021,-1476 3021,-1440 3214,-1440 3214,-1476"/>
<text text-anchor="middle" x="3117.5" y="-1454.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 12&#45;&gt;110 -->
<g id="edge73" class="edge">
<title>12&#45;&gt;110</title>
<path fill="none" stroke="black" d="M3119.76,-1511.7C3119.43,-1503.98 3119.03,-1494.71 3118.66,-1486.11"/>
<polygon fill="black" stroke="black" points="3122.16,-1485.95 3118.23,-1476.1 3115.16,-1486.25 3122.16,-1485.95"/>
</g>
<!-- 13 -->
<g id="node14" class="node">
<title>13</title>
<ellipse fill="none" stroke="black" cx="3424.5" cy="-1170" rx="238.56" ry="18"/>
<text text-anchor="middle" x="3424.5" y="-1166.3" font-family="Times,serif" font-size="14.00">intermediate.dense.bias: Tensor[(3072,), float32]</text>
</g>
<!-- 117 -->
<g id="node82" class="node">
<title>117</title>
<polygon fill="none" stroke="black" points="3179,-1116 3108,-1116 3108,-1080 3179,-1080 3179,-1116"/>
<text text-anchor="middle" x="3143.5" y="-1094.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 13&#45;&gt;117 -->
<g id="edge80" class="edge">
<title>13&#45;&gt;117</title>
<path fill="none" stroke="black" d="M3358.97,-1152.68C3306.7,-1139.65 3234.84,-1121.75 3188.77,-1110.28"/>
<polygon fill="black" stroke="black" points="3189.62,-1106.88 3179.07,-1107.86 3187.93,-1113.67 3189.62,-1106.88"/>
</g>
<!-- 14 -->
<g id="node15" class="node">
<title>14</title>
<ellipse fill="none" stroke="black" cx="2770.5" cy="-882" rx="242.36" ry="18"/>
<text text-anchor="middle" x="2770.5" y="-878.3" font-family="Times,serif" font-size="14.00">output.dense.weight: Tensor[(768, 3072), float32]</text>
</g>
<!-- 129 -->
<g id="node89" class="node">
<title>129</title>
<polygon fill="none" stroke="black" points="2862,-828 2669,-828 2669,-792 2862,-792 2862,-828"/>
<text text-anchor="middle" x="2765.5" y="-806.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 14&#45;&gt;129 -->
<g id="edge88" class="edge">
<title>14&#45;&gt;129</title>
<path fill="none" stroke="black" d="M2769.26,-863.7C2768.71,-855.98 2768.05,-846.71 2767.44,-838.11"/>
<polygon fill="black" stroke="black" points="2770.93,-837.83 2766.72,-828.1 2763.94,-838.33 2770.93,-837.83"/>
</g>
<!-- 15 -->
<g id="node16" class="node">
<title>15</title>
<ellipse fill="none" stroke="black" cx="3203.5" cy="-522" rx="203.36" ry="18"/>
<text text-anchor="middle" x="3203.5" y="-518.3" font-family="Times,serif" font-size="14.00">output.dense.bias: Tensor[(768,), float32]</text>
</g>
<!-- 136 -->
<g id="node94" class="node">
<title>136</title>
<polygon fill="none" stroke="black" points="2796,-468 2725,-468 2725,-432 2796,-432 2796,-468"/>
<text text-anchor="middle" x="2760.5" y="-446.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 15&#45;&gt;136 -->
<g id="edge95" class="edge">
<title>15&#45;&gt;136</title>
<path fill="none" stroke="black" d="M3108.74,-506.03C3015.47,-491.29 2877.01,-469.41 2806.05,-458.2"/>
<polygon fill="black" stroke="black" points="2806.51,-454.73 2796.09,-456.62 2805.42,-461.64 2806.51,-454.73"/>
</g>
<!-- 16 -->
<g id="node17" class="node">
<title>16</title>
<ellipse fill="none" stroke="black" cx="1788.5" cy="-234" rx="241.26" ry="18"/>
<text text-anchor="middle" x="1788.5" y="-230.3" font-family="Times,serif" font-size="14.00">output.LayerNorm.weight: Tensor[(768,), float32]</text>
</g>
<!-- 140 -->
<g id="node98" class="node">
<title>140</title>
<polygon fill="none" stroke="black" points="2515.5,-180 2037.5,-180 2037.5,-144 2515.5,-144 2515.5,-180"/>
<text text-anchor="middle" x="2276.5" y="-158.3" font-family="Times,serif" font-size="14.00">nn.layer_norm(·, ·, ·| axis=&#45;1, epsilon=1e&#45;12, center=1, scale=1)</text>
</g>
<!-- 16&#45;&gt;140 -->
<g id="edge101" class="edge">
<title>16&#45;&gt;140</title>
<path fill="none" stroke="black" d="M1894.39,-217.81C1968.85,-207.13 2068.91,-192.78 2147.95,-181.44"/>
<polygon fill="black" stroke="black" points="2148.57,-184.89 2157.97,-180 2147.58,-177.96 2148.57,-184.89"/>
</g>
<!-- 17 -->
<g id="node18" class="node">
<title>17</title>
<ellipse fill="none" stroke="black" cx="2276.5" cy="-234" rx="228.56" ry="18"/>
<text text-anchor="middle" x="2276.5" y="-230.3" font-family="Times,serif" font-size="14.00">output.LayerNorm.bias: Tensor[(768,), float32]</text>
</g>
<!-- 17&#45;&gt;140 -->
<g id="edge102" class="edge">
<title>17&#45;&gt;140</title>
<path fill="none" stroke="black" d="M2276.5,-215.7C2276.5,-207.98 2276.5,-198.71 2276.5,-190.11"/>
<polygon fill="black" stroke="black" points="2280,-190.1 2276.5,-180.1 2273,-190.1 2280,-190.1"/>
</g>
<!-- 34 -->
<g id="node23" class="node">
<title>34</title>
<polygon fill="none" stroke="black" points="1250,-3420 1081,-3420 1081,-3384 1250,-3384 1250,-3420"/>
<text text-anchor="middle" x="1165.5" y="-3398.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 29&#45;&gt;34 -->
<g id="edge5" class="edge">
<title>29&#45;&gt;34</title>
<path fill="none" stroke="black" d="M1656.57,-3455.97C1532.74,-3442.61 1364.53,-3424.47 1260.59,-3413.26"/>
<polygon fill="black" stroke="black" points="1260.74,-3409.75 1250.42,-3412.16 1259.99,-3416.71 1260.74,-3409.75"/>
</g>
<!-- 32 -->
<g id="node21" class="node">
<title>32</title>
<polygon fill="none" stroke="black" points="1231,-3564 770,-3564 770,-3528 1231,-3528 1231,-3564"/>
<text text-anchor="middle" x="1000.5" y="-3542.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 30&#45;&gt;32 -->
<g id="edge3" class="edge">
<title>30&#45;&gt;32</title>
<path fill="none" stroke="black" d="M1000.5,-3599.7C1000.5,-3591.98 1000.5,-3582.71 1000.5,-3574.11"/>
<polygon fill="black" stroke="black" points="1004,-3574.1 1000.5,-3564.1 997,-3574.1 1004,-3574.1"/>
</g>
<!-- 33 -->
<g id="node22" class="node">
<title>33</title>
<polygon fill="none" stroke="black" points="1106,-3492 895,-3492 895,-3456 1106,-3456 1106,-3492"/>
<text text-anchor="middle" x="1000.5" y="-3470.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 32&#45;&gt;33 -->
<g id="edge4" class="edge">
<title>32&#45;&gt;33</title>
<path fill="none" stroke="black" d="M1000.5,-3527.7C1000.5,-3519.98 1000.5,-3510.71 1000.5,-3502.11"/>
<polygon fill="black" stroke="black" points="1004,-3502.1 1000.5,-3492.1 997,-3502.1 1004,-3502.1"/>
</g>
<!-- 33&#45;&gt;34 -->
<g id="edge6" class="edge">
<title>33&#45;&gt;34</title>
<path fill="none" stroke="black" d="M1040.86,-3455.88C1063.52,-3446.26 1091.97,-3434.19 1115.97,-3424.01"/>
<polygon fill="black" stroke="black" points="1117.43,-3427.2 1125.27,-3420.07 1114.69,-3420.75 1117.43,-3427.2"/>
</g>
<!-- 36 -->
<g id="node24" class="node">
<title>36</title>
<polygon fill="none" stroke="black" points="1387,-3348 944,-3348 944,-3312 1387,-3312 1387,-3348"/>
<text text-anchor="middle" x="1165.5" y="-3326.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 34&#45;&gt;36 -->
<g id="edge7" class="edge">
<title>34&#45;&gt;36</title>
<path fill="none" stroke="black" d="M1165.5,-3383.7C1165.5,-3375.98 1165.5,-3366.71 1165.5,-3358.11"/>
<polygon fill="black" stroke="black" points="1169,-3358.1 1165.5,-3348.1 1162,-3358.1 1169,-3358.1"/>
</g>
<!-- 36&#45;&gt;37 -->
<g id="edge8" class="edge">
<title>36&#45;&gt;37</title>
<path fill="none" stroke="black" d="M1280.36,-3311.97C1380.49,-3297.12 1520.55,-3276.34 1591.86,-3265.77"/>
<polygon fill="black" stroke="black" points="1592.49,-3269.21 1601.87,-3264.28 1591.47,-3262.29 1592.49,-3269.21"/>
</g>
<!-- 39 -->
<g id="node26" class="node">
<title>39</title>
<polygon fill="none" stroke="black" points="1861,-3204 1482,-3204 1482,-3168 1861,-3168 1861,-3204"/>
<text text-anchor="middle" x="1671.5" y="-3182.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 14 12 64]| newshape=..., reverse=0)</text>
</g>
<!-- 37&#45;&gt;39 -->
<g id="edge10" class="edge">
<title>37&#45;&gt;39</title>
<path fill="none" stroke="black" d="M1645.9,-3239.7C1649.77,-3231.73 1654.45,-3222.1 1658.74,-3213.26"/>
<polygon fill="black" stroke="black" points="1661.97,-3214.63 1663.19,-3204.1 1655.67,-3211.57 1661.97,-3214.63"/>
</g>
<!-- 40 -->
<g id="node27" class="node">
<title>40</title>
<polygon fill="none" stroke="black" points="1787,-3132 1558,-3132 1558,-3096 1787,-3096 1787,-3132"/>
<text text-anchor="middle" x="1672.5" y="-3110.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 39&#45;&gt;40 -->
<g id="edge11" class="edge">
<title>39&#45;&gt;40</title>
<path fill="none" stroke="black" d="M1671.75,-3167.7C1671.86,-3159.98 1671.99,-3150.71 1672.11,-3142.11"/>
<polygon fill="black" stroke="black" points="1675.61,-3142.15 1672.26,-3132.1 1668.61,-3142.05 1675.61,-3142.15"/>
</g>
<!-- 42 -->
<g id="node28" class="node">
<title>42</title>
<polygon fill="none" stroke="black" points="1884,-2916 1463,-2916 1463,-2880 1884,-2880 1884,-2916"/>
<text text-anchor="middle" x="1673.5" y="-2894.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 14 64]| newshape=[&#45;1, 14, 64], reverse=0)</text>
</g>
<!-- 40&#45;&gt;42 -->
<g id="edge12" class="edge">
<title>40&#45;&gt;42</title>
<path fill="none" stroke="black" d="M1672.58,-3095.85C1672.75,-3058.83 1673.16,-2971.18 1673.37,-2926.39"/>
<polygon fill="black" stroke="black" points="1676.87,-2926.25 1673.42,-2916.23 1669.87,-2926.22 1676.87,-2926.25"/>
</g>
<!-- 60 -->
<g id="node41" class="node">
<title>60</title>
<polygon fill="none" stroke="black" points="2117,-2844 1948,-2844 1948,-2808 2117,-2808 2117,-2844"/>
<text text-anchor="middle" x="2032.5" y="-2822.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 42&#45;&gt;60 -->
<g id="edge27" class="edge">
<title>42&#45;&gt;60</title>
<path fill="none" stroke="black" d="M1760.86,-2879.97C1814.4,-2869.53 1882.94,-2856.16 1937.77,-2845.47"/>
<polygon fill="black" stroke="black" points="1938.7,-2848.86 1947.85,-2843.51 1937.36,-2841.98 1938.7,-2848.86"/>
</g>
<!-- 49 -->
<g id="node33" class="node">
<title>49</title>
<polygon fill="none" stroke="black" points="2655,-3420 2486,-3420 2486,-3384 2655,-3384 2655,-3420"/>
<text text-anchor="middle" x="2570.5" y="-3398.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 44&#45;&gt;49 -->
<g id="edge17" class="edge">
<title>44&#45;&gt;49</title>
<path fill="none" stroke="black" d="M2351.07,-3455.97C2392.99,-3445.85 2446.29,-3432.98 2489.87,-3422.46"/>
<polygon fill="black" stroke="black" points="2490.96,-3425.8 2499.86,-3420.05 2489.32,-3418.99 2490.96,-3425.8"/>
</g>
<!-- 47 -->
<g id="node31" class="node">
<title>47</title>
<polygon fill="none" stroke="black" points="2859,-3564 2398,-3564 2398,-3528 2859,-3528 2859,-3564"/>
<text text-anchor="middle" x="2628.5" y="-3542.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 45&#45;&gt;47 -->
<g id="edge15" class="edge">
<title>45&#45;&gt;47</title>
<path fill="none" stroke="black" d="M2628.5,-3599.7C2628.5,-3591.98 2628.5,-3582.71 2628.5,-3574.11"/>
<polygon fill="black" stroke="black" points="2632,-3574.1 2628.5,-3564.1 2625,-3574.1 2632,-3574.1"/>
</g>
<!-- 48 -->
<g id="node32" class="node">
<title>48</title>
<polygon fill="none" stroke="black" points="2734,-3492 2523,-3492 2523,-3456 2734,-3456 2734,-3492"/>
<text text-anchor="middle" x="2628.5" y="-3470.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 47&#45;&gt;48 -->
<g id="edge16" class="edge">
<title>47&#45;&gt;48</title>
<path fill="none" stroke="black" d="M2628.5,-3527.7C2628.5,-3519.98 2628.5,-3510.71 2628.5,-3502.11"/>
<polygon fill="black" stroke="black" points="2632,-3502.1 2628.5,-3492.1 2625,-3502.1 2632,-3502.1"/>
</g>
<!-- 48&#45;&gt;49 -->
<g id="edge18" class="edge">
<title>48&#45;&gt;49</title>
<path fill="none" stroke="black" d="M2614.16,-3455.7C2607.21,-3447.3 2598.73,-3437.07 2591.1,-3427.86"/>
<polygon fill="black" stroke="black" points="2593.75,-3425.57 2584.67,-3420.1 2588.36,-3430.04 2593.75,-3425.57"/>
</g>
<!-- 51 -->
<g id="node34" class="node">
<title>51</title>
<polygon fill="none" stroke="black" points="2792,-3348 2349,-3348 2349,-3312 2792,-3312 2792,-3348"/>
<text text-anchor="middle" x="2570.5" y="-3326.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 49&#45;&gt;51 -->
<g id="edge19" class="edge">
<title>49&#45;&gt;51</title>
<path fill="none" stroke="black" d="M2570.5,-3383.7C2570.5,-3375.98 2570.5,-3366.71 2570.5,-3358.11"/>
<polygon fill="black" stroke="black" points="2574,-3358.1 2570.5,-3348.1 2567,-3358.1 2574,-3358.1"/>
</g>
<!-- 51&#45;&gt;52 -->
<g id="edge20" class="edge">
<title>51&#45;&gt;52</title>
<path fill="none" stroke="black" d="M2458.32,-3311.97C2361.1,-3297.2 2225.35,-3276.59 2155.32,-3265.96"/>
<polygon fill="black" stroke="black" points="2155.48,-3262.44 2145.07,-3264.4 2154.43,-3269.36 2155.48,-3262.44"/>
</g>
<!-- 54 -->
<g id="node36" class="node">
<title>54</title>
<polygon fill="none" stroke="black" points="2296,-3204 1917,-3204 1917,-3168 2296,-3168 2296,-3204"/>
<text text-anchor="middle" x="2106.5" y="-3182.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 14 12 64]| newshape=..., reverse=0)</text>
</g>
<!-- 52&#45;&gt;54 -->
<g id="edge22" class="edge">
<title>52&#45;&gt;54</title>
<path fill="none" stroke="black" d="M2108.76,-3239.7C2108.43,-3231.98 2108.03,-3222.71 2107.66,-3214.11"/>
<polygon fill="black" stroke="black" points="2111.16,-3213.95 2107.23,-3204.1 2104.16,-3214.25 2111.16,-3213.95"/>
</g>
<!-- 55 -->
<g id="node37" class="node">
<title>55</title>
<polygon fill="none" stroke="black" points="2210,-3132 1981,-3132 1981,-3096 2210,-3096 2210,-3132"/>
<text text-anchor="middle" x="2095.5" y="-3110.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 54&#45;&gt;55 -->
<g id="edge23" class="edge">
<title>54&#45;&gt;55</title>
<path fill="none" stroke="black" d="M2103.78,-3167.7C2102.57,-3159.98 2101.11,-3150.71 2099.76,-3142.11"/>
<polygon fill="black" stroke="black" points="2103.2,-3141.44 2098.19,-3132.1 2096.28,-3142.53 2103.2,-3141.44"/>
</g>
<!-- 56 -->
<g id="node38" class="node">
<title>56</title>
<polygon fill="none" stroke="black" points="2205,-3060 1976,-3060 1976,-3024 2205,-3024 2205,-3060"/>
<text text-anchor="middle" x="2090.5" y="-3038.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 1, 3, 2])</text>
</g>
<!-- 55&#45;&gt;56 -->
<g id="edge24" class="edge">
<title>55&#45;&gt;56</title>
<path fill="none" stroke="black" d="M2094.26,-3095.7C2093.71,-3087.98 2093.05,-3078.71 2092.44,-3070.11"/>
<polygon fill="black" stroke="black" points="2095.93,-3069.83 2091.72,-3060.1 2088.94,-3070.33 2095.93,-3069.83"/>
</g>
<!-- 58 -->
<g id="node39" class="node">
<title>58</title>
<polygon fill="none" stroke="black" points="2300,-2988 1879,-2988 1879,-2952 2300,-2952 2300,-2988"/>
<text text-anchor="middle" x="2089.5" y="-2966.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 64 14]| newshape=[&#45;1, 64, 14], reverse=0)</text>
</g>
<!-- 56&#45;&gt;58 -->
<g id="edge25" class="edge">
<title>56&#45;&gt;58</title>
<path fill="none" stroke="black" d="M2090.25,-3023.7C2090.14,-3015.98 2090.01,-3006.71 2089.89,-2998.11"/>
<polygon fill="black" stroke="black" points="2093.39,-2998.05 2089.74,-2988.1 2086.39,-2998.15 2093.39,-2998.05"/>
</g>
<!-- 59 -->
<g id="node40" class="node">
<title>59</title>
<polygon fill="none" stroke="black" points="2160,-2916 1949,-2916 1949,-2880 2160,-2880 2160,-2916"/>
<text text-anchor="middle" x="2054.5" y="-2894.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 58&#45;&gt;59 -->
<g id="edge26" class="edge">
<title>58&#45;&gt;59</title>
<path fill="none" stroke="black" d="M2080.85,-2951.7C2076.86,-2943.73 2072.05,-2934.1 2067.63,-2925.26"/>
<polygon fill="black" stroke="black" points="2070.65,-2923.48 2063.05,-2916.1 2064.39,-2926.61 2070.65,-2923.48"/>
</g>
<!-- 59&#45;&gt;60 -->
<g id="edge28" class="edge">
<title>59&#45;&gt;60</title>
<path fill="none" stroke="black" d="M2049.06,-2879.7C2046.61,-2871.9 2043.66,-2862.51 2040.93,-2853.83"/>
<polygon fill="black" stroke="black" points="2044.21,-2852.59 2037.88,-2844.1 2037.54,-2854.69 2044.21,-2852.59"/>
</g>
<!-- 62 -->
<g id="node42" class="node">
<title>62</title>
<polygon fill="none" stroke="black" points="2222,-2772 1843,-2772 1843,-2736 2222,-2736 2222,-2772"/>
<text text-anchor="middle" x="2032.5" y="-2750.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 12 14 14]| newshape=..., reverse=0)</text>
</g>
<!-- 60&#45;&gt;62 -->
<g id="edge29" class="edge">
<title>60&#45;&gt;62</title>
<path fill="none" stroke="black" d="M2032.5,-2807.7C2032.5,-2799.98 2032.5,-2790.71 2032.5,-2782.11"/>
<polygon fill="black" stroke="black" points="2036,-2782.1 2032.5,-2772.1 2029,-2782.1 2036,-2782.1"/>
</g>
<!-- 64 -->
<g id="node43" class="node">
<title>64</title>
<polygon fill="none" stroke="black" points="2086,-2700 1979,-2700 1979,-2664 2086,-2664 2086,-2700"/>
<text text-anchor="middle" x="2032.5" y="-2678.3" font-family="Times,serif" font-size="14.00">divide(·, 8.0)</text>
</g>
<!-- 62&#45;&gt;64 -->
<g id="edge30" class="edge">
<title>62&#45;&gt;64</title>
<path fill="none" stroke="black" d="M2032.5,-2735.7C2032.5,-2727.98 2032.5,-2718.71 2032.5,-2710.11"/>
<polygon fill="black" stroke="black" points="2036,-2710.1 2032.5,-2700.1 2029,-2710.1 2036,-2710.1"/>
</g>
<!-- 64&#45;&gt;65 -->
<g id="edge31" class="edge">
<title>64&#45;&gt;65</title>
<path fill="none" stroke="black" d="M2021.62,-2663.7C2016.51,-2655.56 2010.3,-2645.69 2004.66,-2636.7"/>
<polygon fill="black" stroke="black" points="2007.54,-2634.71 1999.25,-2628.1 2001.61,-2638.43 2007.54,-2634.71"/>
</g>
<!-- 66 -->
<g id="node45" class="node">
<title>66</title>
<polygon fill="none" stroke="black" points="2075.5,-2556 1901.5,-2556 1901.5,-2520 2075.5,-2520 2075.5,-2556"/>
<text text-anchor="middle" x="1988.5" y="-2534.3" font-family="Times,serif" font-size="14.00">nn.softmax(·| axis=&#45;1)</text>
</g>
<!-- 65&#45;&gt;66 -->
<g id="edge33" class="edge">
<title>65&#45;&gt;66</title>
<path fill="none" stroke="black" d="M1988.5,-2591.7C1988.5,-2583.98 1988.5,-2574.71 1988.5,-2566.11"/>
<polygon fill="black" stroke="black" points="1992,-2566.1 1988.5,-2556.1 1985,-2566.1 1992,-2566.1"/>
</g>
<!-- 67 -->
<g id="node46" class="node">
<title>67</title>
<polygon fill="none" stroke="black" points="2080,-2484 1897,-2484 1897,-2448 2080,-2448 2080,-2484"/>
<text text-anchor="middle" x="1988.5" y="-2462.3" font-family="Times,serif" font-size="14.00">nn.dropout(·| rate=0.1)</text>
</g>
<!-- 66&#45;&gt;67 -->
<g id="edge34" class="edge">
<title>66&#45;&gt;67</title>
<path fill="none" stroke="black" d="M1988.5,-2519.7C1988.5,-2511.98 1988.5,-2502.71 1988.5,-2494.11"/>
<polygon fill="black" stroke="black" points="1992,-2494.1 1988.5,-2484.1 1985,-2494.1 1992,-2494.1"/>
</g>
<!-- 68 -->
<g id="node47" class="node">
<title>68</title>
<polygon fill="none" stroke="black" points="2072.5,-2412 1904.5,-2412 1904.5,-2376 2072.5,-2376 2072.5,-2412"/>
<text text-anchor="middle" x="1988.5" y="-2390.3" font-family="Times,serif" font-size="14.00">TupleGetItem(idx=0)</text>
</g>
<!-- 67&#45;&gt;68 -->
<g id="edge35" class="edge">
<title>67&#45;&gt;68</title>
<path fill="none" stroke="black" d="M1988.5,-2447.7C1988.5,-2439.98 1988.5,-2430.71 1988.5,-2422.11"/>
<polygon fill="black" stroke="black" points="1992,-2422.1 1988.5,-2412.1 1985,-2422.1 1992,-2422.1"/>
</g>
<!-- 70 -->
<g id="node48" class="node">
<title>70</title>
<polygon fill="none" stroke="black" points="2199,-2340 1778,-2340 1778,-2304 2199,-2304 2199,-2340"/>
<text text-anchor="middle" x="1988.5" y="-2318.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 14 14]| newshape=[&#45;1, 14, 14], reverse=0)</text>
</g>
<!-- 68&#45;&gt;70 -->
<g id="edge36" class="edge">
<title>68&#45;&gt;70</title>
<path fill="none" stroke="black" d="M1988.5,-2375.7C1988.5,-2367.98 1988.5,-2358.71 1988.5,-2350.11"/>
<polygon fill="black" stroke="black" points="1992,-2350.1 1988.5,-2340.1 1985,-2350.1 1992,-2350.1"/>
</g>
<!-- 87 -->
<g id="node60" class="node">
<title>87</title>
<polygon fill="none" stroke="black" points="2073,-2268 1904,-2268 1904,-2232 2073,-2232 2073,-2268"/>
<text text-anchor="middle" x="1988.5" y="-2246.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 70&#45;&gt;87 -->
<g id="edge50" class="edge">
<title>70&#45;&gt;87</title>
<path fill="none" stroke="black" d="M1988.5,-2303.7C1988.5,-2295.98 1988.5,-2286.71 1988.5,-2278.11"/>
<polygon fill="black" stroke="black" points="1992,-2278.1 1988.5,-2268.1 1985,-2278.1 1992,-2278.1"/>
</g>
<!-- 77 -->
<g id="node53" class="node">
<title>77</title>
<polygon fill="none" stroke="black" points="421,-3420 252,-3420 252,-3384 421,-3384 421,-3420"/>
<text text-anchor="middle" x="336.5" y="-3398.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 72&#45;&gt;77 -->
<g id="edge41" class="edge">
<title>72&#45;&gt;77</title>
<path fill="none" stroke="black" d="M1124.36,-3456.63C1121.39,-3456.42 1118.43,-3456.21 1115.5,-3456 868.89,-3438.61 578.3,-3419.1 431.55,-3409.32"/>
<polygon fill="black" stroke="black" points="431.45,-3405.8 421.23,-3408.63 430.98,-3412.79 431.45,-3405.8"/>
</g>
<!-- 75 -->
<g id="node51" class="node">
<title>75</title>
<polygon fill="none" stroke="black" points="567,-3564 106,-3564 106,-3528 567,-3528 567,-3564"/>
<text text-anchor="middle" x="336.5" y="-3542.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 73&#45;&gt;75 -->
<g id="edge39" class="edge">
<title>73&#45;&gt;75</title>
<path fill="none" stroke="black" d="M336.5,-3599.7C336.5,-3591.98 336.5,-3582.71 336.5,-3574.11"/>
<polygon fill="black" stroke="black" points="340,-3574.1 336.5,-3564.1 333,-3574.1 340,-3574.1"/>
</g>
<!-- 76 -->
<g id="node52" class="node">
<title>76</title>
<polygon fill="none" stroke="black" points="442,-3492 231,-3492 231,-3456 442,-3456 442,-3492"/>
<text text-anchor="middle" x="336.5" y="-3470.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 75&#45;&gt;76 -->
<g id="edge40" class="edge">
<title>75&#45;&gt;76</title>
<path fill="none" stroke="black" d="M336.5,-3527.7C336.5,-3519.98 336.5,-3510.71 336.5,-3502.11"/>
<polygon fill="black" stroke="black" points="340,-3502.1 336.5,-3492.1 333,-3502.1 340,-3502.1"/>
</g>
<!-- 76&#45;&gt;77 -->
<g id="edge42" class="edge">
<title>76&#45;&gt;77</title>
<path fill="none" stroke="black" d="M336.5,-3455.7C336.5,-3447.98 336.5,-3438.71 336.5,-3430.11"/>
<polygon fill="black" stroke="black" points="340,-3430.1 336.5,-3420.1 333,-3430.1 340,-3430.1"/>
</g>
<!-- 79 -->
<g id="node54" class="node">
<title>79</title>
<polygon fill="none" stroke="black" points="443,-3348 0,-3348 0,-3312 443,-3312 443,-3348"/>
<text text-anchor="middle" x="221.5" y="-3326.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 77&#45;&gt;79 -->
<g id="edge43" class="edge">
<title>77&#45;&gt;79</title>
<path fill="none" stroke="black" d="M308.37,-3383.88C293.33,-3374.72 274.63,-3363.34 258.44,-3353.48"/>
<polygon fill="black" stroke="black" points="260.1,-3350.4 249.74,-3348.19 256.46,-3356.38 260.1,-3350.4"/>
</g>
<!-- 79&#45;&gt;80 -->
<g id="edge44" class="edge">
<title>79&#45;&gt;80</title>
<path fill="none" stroke="black" d="M336.36,-3311.97C436.49,-3297.12 576.55,-3276.34 647.86,-3265.77"/>
<polygon fill="black" stroke="black" points="648.49,-3269.21 657.87,-3264.28 647.47,-3262.29 648.49,-3269.21"/>
</g>
<!-- 82 -->
<g id="node56" class="node">
<title>82</title>
<polygon fill="none" stroke="black" points="1156,-3204 777,-3204 777,-3168 1156,-3168 1156,-3204"/>
<text text-anchor="middle" x="966.5" y="-3182.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 14 12 64]| newshape=..., reverse=0)</text>
</g>
<!-- 80&#45;&gt;82 -->
<g id="edge46" class="edge">
<title>80&#45;&gt;82</title>
<path fill="none" stroke="black" d="M729.19,-3247.85C769.86,-3237.42 837.49,-3220.08 890.18,-3206.57"/>
<polygon fill="black" stroke="black" points="891.35,-3209.88 900.17,-3204.01 889.61,-3203.1 891.35,-3209.88"/>
</g>
<!-- 83 -->
<g id="node57" class="node">
<title>83</title>
<polygon fill="none" stroke="black" points="1293,-3132 1064,-3132 1064,-3096 1293,-3096 1293,-3132"/>
<text text-anchor="middle" x="1178.5" y="-3110.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 82&#45;&gt;83 -->
<g id="edge47" class="edge">
<title>82&#45;&gt;83</title>
<path fill="none" stroke="black" d="M1018.09,-3167.97C1047.97,-3158.1 1085.76,-3145.62 1117.16,-3135.25"/>
<polygon fill="black" stroke="black" points="1118.28,-3138.57 1126.68,-3132.11 1116.09,-3131.92 1118.28,-3138.57"/>
</g>
<!-- 85 -->
<g id="node58" class="node">
<title>85</title>
<polygon fill="none" stroke="black" points="1499,-3060 1078,-3060 1078,-3024 1499,-3024 1499,-3060"/>
<text text-anchor="middle" x="1288.5" y="-3038.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 14 64]| newshape=[&#45;1, 14, 64], reverse=0)</text>
</g>
<!-- 83&#45;&gt;85 -->
<g id="edge48" class="edge">
<title>83&#45;&gt;85</title>
<path fill="none" stroke="black" d="M1205.41,-3095.88C1219.66,-3086.81 1237.35,-3075.55 1252.74,-3065.76"/>
<polygon fill="black" stroke="black" points="1254.93,-3068.51 1261.49,-3060.19 1251.17,-3062.61 1254.93,-3068.51"/>
</g>
<!-- 86 -->
<g id="node59" class="node">
<title>86</title>
<polygon fill="none" stroke="black" points="1558,-2844 1347,-2844 1347,-2808 1558,-2808 1558,-2844"/>
<text text-anchor="middle" x="1452.5" y="-2822.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 85&#45;&gt;86 -->
<g id="edge49" class="edge">
<title>85&#45;&gt;86</title>
<path fill="none" stroke="black" d="M1301.64,-3023.85C1330.43,-2986.28 1399.18,-2896.58 1433.01,-2852.43"/>
<polygon fill="black" stroke="black" points="1435.99,-2854.3 1439.29,-2844.23 1430.43,-2850.04 1435.99,-2854.3"/>
</g>
<!-- 86&#45;&gt;87 -->
<g id="edge51" class="edge">
<title>86&#45;&gt;87</title>
<path fill="none" stroke="black" d="M1453.69,-2807.96C1455.43,-2781.31 1458.5,-2728.15 1458.5,-2683 1458.5,-2683 1458.5,-2683 1458.5,-2393 1458.5,-2303.95 1741.16,-2269.24 1893.63,-2256.98"/>
<polygon fill="black" stroke="black" points="1894.24,-2260.45 1903.93,-2256.18 1893.69,-2253.47 1894.24,-2260.45"/>
</g>
<!-- 89 -->
<g id="node61" class="node">
<title>89</title>
<polygon fill="none" stroke="black" points="2212,-2196 1833,-2196 1833,-2160 2212,-2160 2212,-2196"/>
<text text-anchor="middle" x="2022.5" y="-2174.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 12 14 64]| newshape=..., reverse=0)</text>
</g>
<!-- 87&#45;&gt;89 -->
<g id="edge52" class="edge">
<title>87&#45;&gt;89</title>
<path fill="none" stroke="black" d="M1996.9,-2231.7C2000.77,-2223.73 2005.45,-2214.1 2009.74,-2205.26"/>
<polygon fill="black" stroke="black" points="2012.97,-2206.63 2014.19,-2196.1 2006.67,-2203.57 2012.97,-2206.63"/>
</g>
<!-- 90 -->
<g id="node62" class="node">
<title>90</title>
<polygon fill="none" stroke="black" points="2146,-2124 1917,-2124 1917,-2088 2146,-2088 2146,-2124"/>
<text text-anchor="middle" x="2031.5" y="-2102.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 89&#45;&gt;90 -->
<g id="edge53" class="edge">
<title>89&#45;&gt;90</title>
<path fill="none" stroke="black" d="M2024.72,-2159.7C2025.72,-2151.98 2026.91,-2142.71 2028.01,-2134.11"/>
<polygon fill="black" stroke="black" points="2031.5,-2134.47 2029.3,-2124.1 2024.55,-2133.58 2031.5,-2134.47"/>
</g>
<!-- 91 -->
<g id="node63" class="node">
<title>91</title>
<polygon fill="none" stroke="black" points="2081,-2052 2016,-2052 2016,-2016 2081,-2016 2081,-2052"/>
<text text-anchor="middle" x="2048.5" y="-2030.3" font-family="Times,serif" font-size="14.00">copy(·)</text>
</g>
<!-- 90&#45;&gt;91 -->
<g id="edge54" class="edge">
<title>90&#45;&gt;91</title>
<path fill="none" stroke="black" d="M2035.7,-2087.7C2037.6,-2079.9 2039.88,-2070.51 2041.98,-2061.83"/>
<polygon fill="black" stroke="black" points="2045.39,-2062.65 2044.35,-2052.1 2038.58,-2061 2045.39,-2062.65"/>
</g>
<!-- 93 -->
<g id="node64" class="node">
<title>93</title>
<polygon fill="none" stroke="black" points="2270,-1980 1827,-1980 1827,-1944 2270,-1944 2270,-1980"/>
<text text-anchor="middle" x="2048.5" y="-1958.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 91&#45;&gt;93 -->
<g id="edge55" class="edge">
<title>91&#45;&gt;93</title>
<path fill="none" stroke="black" d="M2048.5,-2015.7C2048.5,-2007.98 2048.5,-1998.71 2048.5,-1990.11"/>
<polygon fill="black" stroke="black" points="2052,-1990.1 2048.5,-1980.1 2045,-1990.1 2052,-1990.1"/>
</g>
<!-- 95 -->
<g id="node65" class="node">
<title>95</title>
<polygon fill="none" stroke="black" points="2272.5,-1908 1824.5,-1908 1824.5,-1872 2272.5,-1872 2272.5,-1908"/>
<text text-anchor="middle" x="2048.5" y="-1886.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 93&#45;&gt;95 -->
<g id="edge56" class="edge">
<title>93&#45;&gt;95</title>
<path fill="none" stroke="black" d="M2048.5,-1943.7C2048.5,-1935.98 2048.5,-1926.71 2048.5,-1918.11"/>
<polygon fill="black" stroke="black" points="2052,-1918.1 2048.5,-1908.1 2045,-1918.1 2052,-1918.1"/>
</g>
<!-- 100 -->
<g id="node69" class="node">
<title>100</title>
<polygon fill="none" stroke="black" points="2133,-1836 1964,-1836 1964,-1800 2133,-1800 2133,-1836"/>
<text text-anchor="middle" x="2048.5" y="-1814.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 95&#45;&gt;100 -->
<g id="edge60" class="edge">
<title>95&#45;&gt;100</title>
<path fill="none" stroke="black" d="M2048.5,-1871.7C2048.5,-1863.98 2048.5,-1854.71 2048.5,-1846.11"/>
<polygon fill="black" stroke="black" points="2052,-1846.1 2048.5,-1836.1 2045,-1846.1 2052,-1846.1"/>
</g>
<!-- 98 -->
<g id="node67" class="node">
<title>98</title>
<polygon fill="none" stroke="black" points="1809,-1980 1348,-1980 1348,-1944 1809,-1944 1809,-1980"/>
<text text-anchor="middle" x="1578.5" y="-1958.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 96&#45;&gt;98 -->
<g id="edge58" class="edge">
<title>96&#45;&gt;98</title>
<path fill="none" stroke="black" d="M1578.5,-2015.7C1578.5,-2007.98 1578.5,-1998.71 1578.5,-1990.11"/>
<polygon fill="black" stroke="black" points="1582,-1990.1 1578.5,-1980.1 1575,-1990.1 1582,-1990.1"/>
</g>
<!-- 99 -->
<g id="node68" class="node">
<title>99</title>
<polygon fill="none" stroke="black" points="1745,-1908 1534,-1908 1534,-1872 1745,-1872 1745,-1908"/>
<text text-anchor="middle" x="1639.5" y="-1886.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 98&#45;&gt;99 -->
<g id="edge59" class="edge">
<title>98&#45;&gt;99</title>
<path fill="none" stroke="black" d="M1593.58,-1943.7C1600.89,-1935.3 1609.81,-1925.07 1617.84,-1915.86"/>
<polygon fill="black" stroke="black" points="1620.66,-1917.94 1624.59,-1908.1 1615.39,-1913.34 1620.66,-1917.94"/>
</g>
<!-- 99&#45;&gt;100 -->
<g id="edge61" class="edge">
<title>99&#45;&gt;100</title>
<path fill="none" stroke="black" d="M1739.03,-1871.97C1804.34,-1860.79 1889.26,-1846.25 1953.61,-1835.24"/>
<polygon fill="black" stroke="black" points="1954.49,-1838.64 1963.76,-1833.5 1953.31,-1831.74 1954.49,-1838.64"/>
</g>
<!-- 102 -->
<g id="node70" class="node">
<title>102</title>
<polygon fill="none" stroke="black" points="2274,-1764 1831,-1764 1831,-1728 2274,-1728 2274,-1764"/>
<text text-anchor="middle" x="2052.5" y="-1742.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 100&#45;&gt;102 -->
<g id="edge62" class="edge">
<title>100&#45;&gt;102</title>
<path fill="none" stroke="black" d="M2049.49,-1799.7C2049.93,-1791.98 2050.46,-1782.71 2050.95,-1774.11"/>
<polygon fill="black" stroke="black" points="2054.45,-1774.29 2051.52,-1764.1 2047.46,-1773.89 2054.45,-1774.29"/>
</g>
<!-- 102&#45;&gt;103 -->
<g id="edge63" class="edge">
<title>102&#45;&gt;103</title>
<path fill="none" stroke="black" d="M2171.74,-1727.97C2276.37,-1713.02 2422.98,-1692.07 2496.55,-1681.56"/>
<polygon fill="black" stroke="black" points="2497.46,-1684.97 2506.87,-1680.09 2496.47,-1678.04 2497.46,-1684.97"/>
</g>
<!-- 104 -->
<g id="node72" class="node">
<title>104</title>
<polygon fill="none" stroke="black" points="2714,-1620 2531,-1620 2531,-1584 2714,-1584 2714,-1620"/>
<text text-anchor="middle" x="2622.5" y="-1598.3" font-family="Times,serif" font-size="14.00">nn.dropout(·| rate=0.1)</text>
</g>
<!-- 103&#45;&gt;104 -->
<g id="edge65" class="edge">
<title>103&#45;&gt;104</title>
<path fill="none" stroke="black" d="M2562.28,-1655.7C2572.25,-1646.97 2584.51,-1636.24 2595.36,-1626.75"/>
<polygon fill="black" stroke="black" points="2597.73,-1629.32 2602.95,-1620.1 2593.12,-1624.06 2597.73,-1629.32"/>
</g>
<!-- 105 -->
<g id="node73" class="node">
<title>105</title>
<polygon fill="none" stroke="black" points="2788.5,-1548 2620.5,-1548 2620.5,-1512 2788.5,-1512 2788.5,-1548"/>
<text text-anchor="middle" x="2704.5" y="-1526.3" font-family="Times,serif" font-size="14.00">TupleGetItem(idx=0)</text>
</g>
<!-- 104&#45;&gt;105 -->
<g id="edge66" class="edge">
<title>104&#45;&gt;105</title>
<path fill="none" stroke="black" d="M2642.77,-1583.7C2653,-1574.97 2665.56,-1564.24 2676.68,-1554.75"/>
<polygon fill="black" stroke="black" points="2679.13,-1557.26 2684.46,-1548.1 2674.59,-1551.94 2679.13,-1557.26"/>
</g>
<!-- 105&#45;&gt;106 -->
<g id="edge67" class="edge">
<title>105&#45;&gt;106</title>
<path fill="none" stroke="black" d="M2732.88,-1511.88C2748.04,-1502.72 2766.91,-1491.34 2783.24,-1481.48"/>
<polygon fill="black" stroke="black" points="2785.26,-1484.35 2792.01,-1476.19 2781.64,-1478.36 2785.26,-1484.35"/>
</g>
<!-- 106&#45;&gt;107 -->
<g id="edge69" class="edge">
<title>106&#45;&gt;107</title>
<path fill="none" stroke="black" d="M2784.82,-1442.89C2781.69,-1441.85 2778.55,-1440.86 2775.5,-1440 2725.82,-1425.94 2670.04,-1414.43 2621.77,-1405.76"/>
<polygon fill="black" stroke="black" points="2622.36,-1402.31 2611.91,-1404.01 2621.14,-1409.21 2622.36,-1402.31"/>
</g>
<!-- 109 -->
<g id="node76" class="node">
<title>109</title>
<polygon fill="none" stroke="black" points="2980.5,-1332 2532.5,-1332 2532.5,-1296 2980.5,-1296 2980.5,-1332"/>
<text text-anchor="middle" x="2756.5" y="-1310.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 107&#45;&gt;109 -->
<g id="edge72" class="edge">
<title>107&#45;&gt;109</title>
<path fill="none" stroke="black" d="M2562.8,-1367.97C2599.34,-1357.97 2645.69,-1345.3 2683.89,-1334.85"/>
<polygon fill="black" stroke="black" points="2685.2,-1338.12 2693.92,-1332.11 2683.35,-1331.37 2685.2,-1338.12"/>
</g>
<!-- 139 -->
<g id="node97" class="node">
<title>139</title>
<polygon fill="none" stroke="black" points="2594,-252 2523,-252 2523,-216 2594,-216 2594,-252"/>
<text text-anchor="middle" x="2558.5" y="-230.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 107&#45;&gt;139 -->
<g id="edge99" class="edge">
<title>107&#45;&gt;139</title>
<path fill="none" stroke="black" d="M2500.5,-1367.95C2500.5,-1341.29 2500.5,-1288.11 2500.5,-1243 2500.5,-1243 2500.5,-1243 2500.5,-377 2500.5,-336.14 2506.91,-325.33 2523.5,-288 2527.69,-278.58 2533.49,-268.97 2539.16,-260.56"/>
<polygon fill="black" stroke="black" points="2542.08,-262.5 2544.96,-252.3 2536.35,-258.48 2542.08,-262.5"/>
</g>
<!-- 114 -->
<g id="node80" class="node">
<title>114</title>
<polygon fill="none" stroke="black" points="3017,-1260 2848,-1260 2848,-1224 3017,-1224 3017,-1260"/>
<text text-anchor="middle" x="2932.5" y="-1238.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 109&#45;&gt;114 -->
<g id="edge76" class="edge">
<title>109&#45;&gt;114</title>
<path fill="none" stroke="black" d="M2799.55,-1295.88C2823.83,-1286.22 2854.34,-1274.09 2880.01,-1263.88"/>
<polygon fill="black" stroke="black" points="2881.59,-1267.02 2889.59,-1260.07 2879,-1260.51 2881.59,-1267.02"/>
</g>
<!-- 112 -->
<g id="node78" class="node">
<title>112</title>
<polygon fill="none" stroke="black" points="3314,-1404 2917,-1404 2917,-1368 3314,-1368 3314,-1404"/>
<text text-anchor="middle" x="3115.5" y="-1382.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#45;1 &#160;768 3072]| newshape=..., reverse=0)</text>
</g>
<!-- 110&#45;&gt;112 -->
<g id="edge74" class="edge">
<title>110&#45;&gt;112</title>
<path fill="none" stroke="black" d="M3117.01,-1439.7C3116.79,-1431.98 3116.52,-1422.71 3116.27,-1414.11"/>
<polygon fill="black" stroke="black" points="3119.77,-1414 3115.99,-1404.1 3112.78,-1414.2 3119.77,-1414"/>
</g>
<!-- 113 -->
<g id="node79" class="node">
<title>113</title>
<polygon fill="none" stroke="black" points="3214,-1332 3003,-1332 3003,-1296 3214,-1296 3214,-1332"/>
<text text-anchor="middle" x="3108.5" y="-1310.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 112&#45;&gt;113 -->
<g id="edge75" class="edge">
<title>112&#45;&gt;113</title>
<path fill="none" stroke="black" d="M3113.77,-1367.7C3113,-1359.98 3112.07,-1350.71 3111.21,-1342.11"/>
<polygon fill="black" stroke="black" points="3114.69,-1341.71 3110.21,-1332.1 3107.72,-1342.4 3114.69,-1341.71"/>
</g>
<!-- 113&#45;&gt;114 -->
<g id="edge77" class="edge">
<title>113&#45;&gt;114</title>
<path fill="none" stroke="black" d="M3065.45,-1295.88C3041.17,-1286.22 3010.66,-1274.09 2984.99,-1263.88"/>
<polygon fill="black" stroke="black" points="2986,-1260.51 2975.41,-1260.07 2983.41,-1267.02 2986,-1260.51"/>
</g>
<!-- 116 -->
<g id="node81" class="node">
<title>116</title>
<polygon fill="none" stroke="black" points="3167.5,-1188 2697.5,-1188 2697.5,-1152 3167.5,-1152 3167.5,-1188"/>
<text text-anchor="middle" x="2932.5" y="-1166.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#160;1 &#160;&#160;14 3072]| newshape=[1, 14, 3072], reverse=0)</text>
</g>
<!-- 114&#45;&gt;116 -->
<g id="edge78" class="edge">
<title>114&#45;&gt;116</title>
<path fill="none" stroke="black" d="M2932.5,-1223.7C2932.5,-1215.98 2932.5,-1206.71 2932.5,-1198.11"/>
<polygon fill="black" stroke="black" points="2936,-1198.1 2932.5,-1188.1 2929,-1198.1 2936,-1198.1"/>
</g>
<!-- 116&#45;&gt;117 -->
<g id="edge79" class="edge">
<title>116&#45;&gt;117</title>
<path fill="none" stroke="black" d="M2983.85,-1151.97C3018.82,-1140.36 3064.69,-1125.15 3098.28,-1114"/>
<polygon fill="black" stroke="black" points="3099.46,-1117.3 3107.85,-1110.83 3097.25,-1110.65 3099.46,-1117.3"/>
</g>
<!-- 121 -->
<g id="node83" class="node">
<title>121</title>
<polygon fill="none" stroke="black" points="3296,-1044 3111,-1044 3111,-1008 3296,-1008 3296,-1044"/>
<text text-anchor="middle" x="3203.5" y="-1022.3" font-family="Times,serif" font-size="14.00">multiply(·, 0.70710677)</text>
</g>
<!-- 117&#45;&gt;121 -->
<g id="edge81" class="edge">
<title>117&#45;&gt;121</title>
<path fill="none" stroke="black" d="M3158.33,-1079.7C3165.52,-1071.3 3174.3,-1061.07 3182.19,-1051.86"/>
<polygon fill="black" stroke="black" points="3184.99,-1053.97 3188.84,-1044.1 3179.67,-1049.42 3184.99,-1053.97"/>
</g>
<!-- 126 -->
<g id="node87" class="node">
<title>126</title>
<polygon fill="none" stroke="black" points="3175.5,-756 3071.5,-756 3071.5,-720 3175.5,-720 3175.5,-756"/>
<text text-anchor="middle" x="3123.5" y="-734.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 117&#45;&gt;126 -->
<g id="edge85" class="edge">
<title>117&#45;&gt;126</title>
<path fill="none" stroke="black" d="M3126.88,-1079.7C3118.38,-1069.93 3108.57,-1057.09 3102.5,-1044 3085.48,-1007.31 3083.5,-995.45 3083.5,-955 3083.5,-955 3083.5,-955 3083.5,-881 3083.5,-839.48 3099.54,-793.61 3111.3,-765.54"/>
<polygon fill="black" stroke="black" points="3114.65,-766.62 3115.41,-756.05 3108.23,-763.84 3114.65,-766.62"/>
</g>
<!-- 122 -->
<g id="node84" class="node">
<title>122</title>
<polygon fill="none" stroke="black" points="3222.5,-972 3168.5,-972 3168.5,-936 3222.5,-936 3222.5,-972"/>
<text text-anchor="middle" x="3195.5" y="-950.3" font-family="Times,serif" font-size="14.00">erf(·)</text>
</g>
<!-- 121&#45;&gt;122 -->
<g id="edge82" class="edge">
<title>121&#45;&gt;122</title>
<path fill="none" stroke="black" d="M3201.52,-1007.7C3200.64,-999.98 3199.58,-990.71 3198.6,-982.11"/>
<polygon fill="black" stroke="black" points="3202.07,-981.64 3197.45,-972.1 3195.11,-982.44 3202.07,-981.64"/>
</g>
<!-- 124 -->
<g id="node85" class="node">
<title>124</title>
<polygon fill="none" stroke="black" points="3248.5,-900 3126.5,-900 3126.5,-864 3248.5,-864 3248.5,-900"/>
<text text-anchor="middle" x="3187.5" y="-878.3" font-family="Times,serif" font-size="14.00">multiply(·, 0.5)</text>
</g>
<!-- 122&#45;&gt;124 -->
<g id="edge83" class="edge">
<title>122&#45;&gt;124</title>
<path fill="none" stroke="black" d="M3193.52,-935.7C3192.64,-927.98 3191.58,-918.71 3190.6,-910.11"/>
<polygon fill="black" stroke="black" points="3194.07,-909.64 3189.45,-900.1 3187.11,-910.44 3194.07,-909.64"/>
</g>
<!-- 125 -->
<g id="node86" class="node">
<title>125</title>
<polygon fill="none" stroke="black" points="3209,-828 3120,-828 3120,-792 3209,-792 3209,-828"/>
<text text-anchor="middle" x="3164.5" y="-806.3" font-family="Times,serif" font-size="14.00">add(0.5, ·)</text>
</g>
<!-- 124&#45;&gt;125 -->
<g id="edge84" class="edge">
<title>124&#45;&gt;125</title>
<path fill="none" stroke="black" d="M3181.81,-863.7C3179.25,-855.9 3176.17,-846.51 3173.32,-837.83"/>
<polygon fill="black" stroke="black" points="3176.57,-836.51 3170.12,-828.1 3169.92,-838.7 3176.57,-836.51"/>
</g>
<!-- 125&#45;&gt;126 -->
<g id="edge86" class="edge">
<title>125&#45;&gt;126</title>
<path fill="none" stroke="black" d="M3154.37,-791.7C3149.65,-783.64 3143.94,-773.89 3138.72,-764.98"/>
<polygon fill="black" stroke="black" points="3141.59,-762.96 3133.52,-756.1 3135.55,-766.5 3141.59,-762.96"/>
</g>
<!-- 128 -->
<g id="node88" class="node">
<title>128</title>
<polygon fill="none" stroke="black" points="3361,-684 2886,-684 2886,-648 3361,-648 3361,-684"/>
<text text-anchor="middle" x="3123.5" y="-662.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#45;1 &#160;&#160;14 3072]| newshape=[&#45;1, 14, 3072], reverse=0)</text>
</g>
<!-- 126&#45;&gt;128 -->
<g id="edge87" class="edge">
<title>126&#45;&gt;128</title>
<path fill="none" stroke="black" d="M3123.5,-719.7C3123.5,-711.98 3123.5,-702.71 3123.5,-694.11"/>
<polygon fill="black" stroke="black" points="3127,-694.1 3123.5,-684.1 3120,-694.1 3127,-694.1"/>
</g>
<!-- 133 -->
<g id="node92" class="node">
<title>133</title>
<polygon fill="none" stroke="black" points="2847,-612 2678,-612 2678,-576 2847,-576 2847,-612"/>
<text text-anchor="middle" x="2762.5" y="-590.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 128&#45;&gt;133 -->
<g id="edge91" class="edge">
<title>128&#45;&gt;133</title>
<path fill="none" stroke="black" d="M3035.65,-647.97C2981.62,-637.49 2912.39,-624.07 2857.15,-613.35"/>
<polygon fill="black" stroke="black" points="2857.49,-609.85 2847.01,-611.39 2856.16,-616.73 2857.49,-609.85"/>
</g>
<!-- 131 -->
<g id="node90" class="node">
<title>131</title>
<polygon fill="none" stroke="black" points="2963,-756 2566,-756 2566,-720 2963,-720 2963,-756"/>
<text text-anchor="middle" x="2764.5" y="-734.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#45;1 3072 &#160;768]| newshape=..., reverse=0)</text>
</g>
<!-- 129&#45;&gt;131 -->
<g id="edge89" class="edge">
<title>129&#45;&gt;131</title>
<path fill="none" stroke="black" d="M2765.25,-791.7C2765.14,-783.98 2765.01,-774.71 2764.89,-766.11"/>
<polygon fill="black" stroke="black" points="2768.39,-766.05 2764.74,-756.1 2761.39,-766.15 2768.39,-766.05"/>
</g>
<!-- 132 -->
<g id="node91" class="node">
<title>132</title>
<polygon fill="none" stroke="black" points="2868,-684 2657,-684 2657,-648 2868,-648 2868,-684"/>
<text text-anchor="middle" x="2762.5" y="-662.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 131&#45;&gt;132 -->
<g id="edge90" class="edge">
<title>131&#45;&gt;132</title>
<path fill="none" stroke="black" d="M2764.01,-719.7C2763.79,-711.98 2763.52,-702.71 2763.27,-694.11"/>
<polygon fill="black" stroke="black" points="2766.77,-694 2762.99,-684.1 2759.78,-694.2 2766.77,-694"/>
</g>
<!-- 132&#45;&gt;133 -->
<g id="edge92" class="edge">
<title>132&#45;&gt;133</title>
<path fill="none" stroke="black" d="M2762.5,-647.7C2762.5,-639.98 2762.5,-630.71 2762.5,-622.11"/>
<polygon fill="black" stroke="black" points="2766,-622.1 2762.5,-612.1 2759,-622.1 2766,-622.1"/>
</g>
<!-- 135 -->
<g id="node93" class="node">
<title>135</title>
<polygon fill="none" stroke="black" points="2982,-540 2539,-540 2539,-504 2982,-504 2982,-540"/>
<text text-anchor="middle" x="2760.5" y="-518.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 133&#45;&gt;135 -->
<g id="edge93" class="edge">
<title>133&#45;&gt;135</title>
<path fill="none" stroke="black" d="M2762.01,-575.7C2761.79,-567.98 2761.52,-558.71 2761.27,-550.11"/>
<polygon fill="black" stroke="black" points="2764.77,-550 2760.99,-540.1 2757.78,-550.2 2764.77,-550"/>
</g>
<!-- 135&#45;&gt;136 -->
<g id="edge94" class="edge">
<title>135&#45;&gt;136</title>
<path fill="none" stroke="black" d="M2760.5,-503.7C2760.5,-495.98 2760.5,-486.71 2760.5,-478.11"/>
<polygon fill="black" stroke="black" points="2764,-478.1 2760.5,-468.1 2757,-478.1 2764,-478.1"/>
</g>
<!-- 137 -->
<g id="node95" class="node">
<title>137</title>
<polygon fill="none" stroke="black" points="2777,-396 2594,-396 2594,-360 2777,-360 2777,-396"/>
<text text-anchor="middle" x="2685.5" y="-374.3" font-family="Times,serif" font-size="14.00">nn.dropout(·| rate=0.1)</text>
</g>
<!-- 136&#45;&gt;137 -->
<g id="edge96" class="edge">
<title>136&#45;&gt;137</title>
<path fill="none" stroke="black" d="M2741.96,-431.7C2732.7,-423.05 2721.34,-412.45 2711.24,-403.03"/>
<polygon fill="black" stroke="black" points="2713.52,-400.37 2703.83,-396.1 2708.75,-405.49 2713.52,-400.37"/>
</g>
<!-- 138 -->
<g id="node96" class="node">
<title>138</title>
<polygon fill="none" stroke="black" points="2700.5,-324 2532.5,-324 2532.5,-288 2700.5,-288 2700.5,-324"/>
<text text-anchor="middle" x="2616.5" y="-302.3" font-family="Times,serif" font-size="14.00">TupleGetItem(idx=0)</text>
</g>
<!-- 137&#45;&gt;138 -->
<g id="edge97" class="edge">
<title>137&#45;&gt;138</title>
<path fill="none" stroke="black" d="M2668.44,-359.7C2660,-351.14 2649.68,-340.66 2640.46,-331.3"/>
<polygon fill="black" stroke="black" points="2642.87,-328.77 2633.36,-324.1 2637.89,-333.68 2642.87,-328.77"/>
</g>
<!-- 138&#45;&gt;139 -->
<g id="edge98" class="edge">
<title>138&#45;&gt;139</title>
<path fill="none" stroke="black" d="M2602.16,-287.7C2595.21,-279.3 2586.73,-269.07 2579.1,-259.86"/>
<polygon fill="black" stroke="black" points="2581.75,-257.57 2572.67,-252.1 2576.36,-262.04 2581.75,-257.57"/>
</g>
<!-- 139&#45;&gt;140 -->
<g id="edge100" class="edge">
<title>139&#45;&gt;140</title>
<path fill="none" stroke="black" d="M2522.63,-218.64C2519.9,-217.7 2517.16,-216.81 2514.5,-216 2470.24,-202.6 2420.42,-190.99 2377.83,-182.08"/>
<polygon fill="black" stroke="black" points="2378.52,-178.64 2368.02,-180.04 2377.1,-185.5 2378.52,-178.64"/>
</g>
<!-- 141 -->
<g id="node99" class="node">
<title>141</title>
<polygon fill="none" stroke="black" points="2319.5,-108 2233.5,-108 2233.5,-72 2319.5,-72 2319.5,-108"/>
<text text-anchor="middle" x="2276.5" y="-86.3" font-family="Times,serif" font-size="14.00">Tuple[...])</text>
</g>
<!-- 140&#45;&gt;141 -->
<g id="edge103" class="edge">
<title>140&#45;&gt;141</title>
<path fill="none" stroke="black" d="M2276.5,-143.7C2276.5,-135.98 2276.5,-126.71 2276.5,-118.11"/>
<polygon fill="black" stroke="black" points="2280,-118.1 2276.5,-108.1 2273,-118.1 2280,-118.1"/>
</g>
<!-- 142 -->
<g id="node100" class="node">
<title>142</title>
<polygon fill="none" stroke="black" points="2316.5,-36 2236.5,-36 2236.5,0 2316.5,0 2316.5,-36"/>
<text text-anchor="middle" x="2276.5" y="-14.3" font-family="Times,serif" font-size="14.00">Function</text>
</g>
<!-- 141&#45;&gt;142 -->
<g id="edge104" class="edge">
<title>141&#45;&gt;142</title>
<path fill="none" stroke="black" d="M2276.5,-71.7C2276.5,-63.98 2276.5,-54.71 2276.5,-46.11"/>
<polygon fill="black" stroke="black" points="2280,-46.1 2276.5,-36.1 2273,-46.1 2280,-46.1"/>
</g>
</g>
</svg>