blob: 10a213df2be6e7419dacb96db0c6cddda057cc4d [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.43.0 (0)
-->
<!-- Title: %3 Pages: 1 -->
<svg width="3199pt" height="4364pt"
viewBox="0.00 0.00 3198.78 4364.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 4360)">
<title>%3</title>
<polygon fill="white" stroke="transparent" points="-4,4 -4,-4360 3194.78,-4360 3194.78,4 -4,4"/>
<!-- 0 -->
<g id="node1" class="node">
<title>0</title>
<ellipse fill="none" stroke="black" cx="1272" cy="-4194" rx="170.87" ry="18"/>
<text text-anchor="middle" x="1272" y="-4190.3" font-family="Times,serif" font-size="14.00">input: Tensor[(1, 14, 768), float32]</text>
</g>
<!-- 31 -->
<g id="node22" class="node">
<title>31</title>
<polygon fill="none" stroke="black" points="2024,-4140 1576,-4140 1576,-4104 2024,-4104 2024,-4140"/>
<text text-anchor="middle" x="1800" y="-4118.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 0&#45;&gt;31 -->
<g id="edge1" class="edge">
<title>0&#45;&gt;31</title>
<path fill="none" stroke="black" d="M1373.8,-4179.5C1455.55,-4168.67 1571.16,-4153.34 1661.35,-4141.38"/>
<polygon fill="black" stroke="black" points="1661.93,-4144.84 1671.39,-4140.05 1661.01,-4137.9 1661.93,-4144.84"/>
</g>
<!-- 94 -->
<g id="node74" class="node">
<title>94</title>
<polygon fill="none" stroke="black" points="1618.5,-2340 1547.5,-2340 1547.5,-2304 1618.5,-2304 1618.5,-2340"/>
<text text-anchor="middle" x="1583" y="-2318.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 0&#45;&gt;94 -->
<g id="edge67" class="edge">
<title>0&#45;&gt;94</title>
<path fill="none" stroke="black" d="M1113.6,-4187.23C932.27,-4176.19 660,-4144.12 660,-4051 660,-4051 660,-4051 660,-2609 660,-2562.03 678.56,-2547 717,-2520 853.57,-2424.05 1378.5,-2349.5 1537.26,-2328.77"/>
<polygon fill="black" stroke="black" points="1537.71,-2332.24 1547.18,-2327.48 1536.81,-2325.3 1537.71,-2332.24"/>
</g>
<!-- 1 -->
<g id="node2" class="node">
<title>1</title>
<ellipse fill="none" stroke="black" cx="2293" cy="-3402" rx="217.96" ry="18"/>
<text text-anchor="middle" x="2293" y="-3398.3" font-family="Times,serif" font-size="14.00">attention_mask: Tensor[(1, 1, 1, 14), float32]</text>
</g>
<!-- 61 -->
<g id="node45" class="node">
<title>61</title>
<polygon fill="none" stroke="black" points="2333.5,-3348 2262.5,-3348 2262.5,-3312 2333.5,-3312 2333.5,-3348"/>
<text text-anchor="middle" x="2298" y="-3326.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 1&#45;&gt;61 -->
<g id="edge30" class="edge">
<title>1&#45;&gt;61</title>
<path fill="none" stroke="black" d="M2294.24,-3383.7C2294.79,-3375.98 2295.45,-3366.71 2296.06,-3358.11"/>
<polygon fill="black" stroke="black" points="2299.56,-3358.33 2296.78,-3348.1 2292.57,-3357.83 2299.56,-3358.33"/>
</g>
<!-- 2 -->
<g id="node3" class="node">
<title>2</title>
<ellipse fill="none" stroke="black" cx="2273" cy="-4266" rx="265.65" ry="18"/>
<text text-anchor="middle" x="2273" y="-4262.3" font-family="Times,serif" font-size="14.00">attention.self.query.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 32 -->
<g id="node23" class="node">
<title>32</title>
<polygon fill="none" stroke="black" points="2369.5,-4212 2176.5,-4212 2176.5,-4176 2369.5,-4176 2369.5,-4212"/>
<text text-anchor="middle" x="2273" y="-4190.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 2&#45;&gt;32 -->
<g id="edge2" class="edge">
<title>2&#45;&gt;32</title>
<path fill="none" stroke="black" d="M2273,-4247.7C2273,-4239.98 2273,-4230.71 2273,-4222.11"/>
<polygon fill="black" stroke="black" points="2276.5,-4222.1 2273,-4212.1 2269.5,-4222.1 2276.5,-4222.1"/>
</g>
<!-- 3 -->
<g id="node4" class="node">
<title>3</title>
<ellipse fill="none" stroke="black" cx="2089" cy="-3906" rx="232.86" ry="18"/>
<text text-anchor="middle" x="2089" y="-3902.3" font-family="Times,serif" font-size="14.00">attention.self.query.bias: Tensor[(768,), float32]</text>
</g>
<!-- 39 -->
<g id="node28" class="node">
<title>39</title>
<polygon fill="none" stroke="black" points="2550.5,-3852 2479.5,-3852 2479.5,-3816 2550.5,-3816 2550.5,-3852"/>
<text text-anchor="middle" x="2515" y="-3830.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 3&#45;&gt;39 -->
<g id="edge9" class="edge">
<title>3&#45;&gt;39</title>
<path fill="none" stroke="black" d="M2183.28,-3889.51C2272.09,-3874.91 2401.37,-3853.67 2469.33,-3842.5"/>
<polygon fill="black" stroke="black" points="2470,-3845.94 2479.3,-3840.87 2468.87,-3839.03 2470,-3845.94"/>
</g>
<!-- 4 -->
<g id="node5" class="node">
<title>4</title>
<ellipse fill="none" stroke="black" cx="2936" cy="-4338" rx="254.55" ry="18"/>
<text text-anchor="middle" x="2936" y="-4334.3" font-family="Times,serif" font-size="14.00">attention.self.key.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 45 -->
<g id="node32" class="node">
<title>45</title>
<polygon fill="none" stroke="black" points="3032.5,-4284 2839.5,-4284 2839.5,-4248 3032.5,-4248 3032.5,-4284"/>
<text text-anchor="middle" x="2936" y="-4262.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 4&#45;&gt;45 -->
<g id="edge13" class="edge">
<title>4&#45;&gt;45</title>
<path fill="none" stroke="black" d="M2936,-4319.7C2936,-4311.98 2936,-4302.71 2936,-4294.11"/>
<polygon fill="black" stroke="black" points="2939.5,-4294.1 2936,-4284.1 2932.5,-4294.1 2939.5,-4294.1"/>
</g>
<!-- 5 -->
<g id="node6" class="node">
<title>5</title>
<ellipse fill="none" stroke="black" cx="2847" cy="-3978" rx="221.76" ry="18"/>
<text text-anchor="middle" x="2847" y="-3974.3" font-family="Times,serif" font-size="14.00">attention.self.key.bias: Tensor[(768,), float32]</text>
</g>
<!-- 50 -->
<g id="node37" class="node">
<title>50</title>
<polygon fill="none" stroke="black" points="2882.5,-3924 2811.5,-3924 2811.5,-3888 2882.5,-3888 2882.5,-3924"/>
<text text-anchor="middle" x="2847" y="-3902.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 5&#45;&gt;50 -->
<g id="edge20" class="edge">
<title>5&#45;&gt;50</title>
<path fill="none" stroke="black" d="M2847,-3959.7C2847,-3951.98 2847,-3942.71 2847,-3934.11"/>
<polygon fill="black" stroke="black" points="2850.5,-3934.1 2847,-3924.1 2843.5,-3934.1 2850.5,-3934.1"/>
</g>
<!-- 6 -->
<g id="node7" class="node">
<title>6</title>
<ellipse fill="none" stroke="black" cx="2121" cy="-3834" rx="265.35" ry="18"/>
<text text-anchor="middle" x="2121" y="-3830.3" font-family="Times,serif" font-size="14.00">attention.self.value.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 68 -->
<g id="node50" class="node">
<title>68</title>
<polygon fill="none" stroke="black" points="2208.5,-3780 2015.5,-3780 2015.5,-3744 2208.5,-3744 2208.5,-3780"/>
<text text-anchor="middle" x="2112" y="-3758.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 6&#45;&gt;68 -->
<g id="edge36" class="edge">
<title>6&#45;&gt;68</title>
<path fill="none" stroke="black" d="M2118.78,-3815.7C2117.78,-3807.98 2116.59,-3798.71 2115.49,-3790.11"/>
<polygon fill="black" stroke="black" points="2118.95,-3789.58 2114.2,-3780.1 2112,-3790.47 2118.95,-3789.58"/>
</g>
<!-- 7 -->
<g id="node8" class="node">
<title>7</title>
<ellipse fill="none" stroke="black" cx="1077" cy="-3474" rx="232.06" ry="18"/>
<text text-anchor="middle" x="1077" y="-3470.3" font-family="Times,serif" font-size="14.00">attention.self.value.bias: Tensor[(768,), float32]</text>
</g>
<!-- 73 -->
<g id="node55" class="node">
<title>73</title>
<polygon fill="none" stroke="black" points="1112.5,-3420 1041.5,-3420 1041.5,-3384 1112.5,-3384 1112.5,-3420"/>
<text text-anchor="middle" x="1077" y="-3398.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 7&#45;&gt;73 -->
<g id="edge43" class="edge">
<title>7&#45;&gt;73</title>
<path fill="none" stroke="black" d="M1077,-3455.7C1077,-3447.98 1077,-3438.71 1077,-3430.11"/>
<polygon fill="black" stroke="black" points="1080.5,-3430.1 1077,-3420.1 1073.5,-3430.1 1080.5,-3430.1"/>
</g>
<!-- 8 -->
<g id="node9" class="node">
<title>8</title>
<ellipse fill="none" stroke="black" cx="1920" cy="-2898" rx="282.15" ry="18"/>
<text text-anchor="middle" x="1920" y="-2894.3" font-family="Times,serif" font-size="14.00">attention.output.dense.weight: Tensor[(768, 768), float32]</text>
</g>
<!-- 85 -->
<g id="node66" class="node">
<title>85</title>
<polygon fill="none" stroke="black" points="2041.5,-2844 1848.5,-2844 1848.5,-2808 2041.5,-2808 2041.5,-2844"/>
<text text-anchor="middle" x="1945" y="-2822.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 8&#45;&gt;85 -->
<g id="edge55" class="edge">
<title>8&#45;&gt;85</title>
<path fill="none" stroke="black" d="M1926.18,-2879.7C1929,-2871.81 1932.39,-2862.3 1935.52,-2853.55"/>
<polygon fill="black" stroke="black" points="1938.82,-2854.7 1938.89,-2844.1 1932.23,-2852.34 1938.82,-2854.7"/>
</g>
<!-- 9 -->
<g id="node10" class="node">
<title>9</title>
<ellipse fill="none" stroke="black" cx="975" cy="-2538" rx="248.86" ry="18"/>
<text text-anchor="middle" x="975" y="-2534.3" font-family="Times,serif" font-size="14.00">attention.output.dense.bias: Tensor[(768,), float32]</text>
</g>
<!-- 90 -->
<g id="node71" class="node">
<title>90</title>
<polygon fill="none" stroke="black" points="1537.5,-2484 1466.5,-2484 1466.5,-2448 1537.5,-2448 1537.5,-2484"/>
<text text-anchor="middle" x="1502" y="-2462.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 9&#45;&gt;90 -->
<g id="edge62" class="edge">
<title>9&#45;&gt;90</title>
<path fill="none" stroke="black" d="M1088.38,-2521.94C1203.03,-2506.71 1374.57,-2483.93 1455.97,-2473.11"/>
<polygon fill="black" stroke="black" points="1456.92,-2476.52 1466.37,-2471.73 1456,-2469.58 1456.92,-2476.52"/>
</g>
<!-- 10 -->
<g id="node11" class="node">
<title>10</title>
<ellipse fill="none" stroke="black" cx="829" cy="-1962" rx="286.75" ry="18"/>
<text text-anchor="middle" x="829" y="-1958.3" font-family="Times,serif" font-size="14.00">attention.output.LayerNorm.weight: Tensor[(768,), float32]</text>
</g>
<!-- 105 -->
<g id="node81" class="node">
<title>105</title>
<polygon fill="none" stroke="black" points="943,-1908 839,-1908 839,-1872 943,-1872 943,-1908"/>
<text text-anchor="middle" x="891" y="-1886.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 10&#45;&gt;105 -->
<g id="edge78" class="edge">
<title>10&#45;&gt;105</title>
<path fill="none" stroke="black" d="M844.33,-1943.7C851.76,-1935.3 860.82,-1925.07 868.98,-1915.86"/>
<polygon fill="black" stroke="black" points="871.84,-1917.91 875.85,-1908.1 866.6,-1913.27 871.84,-1917.91"/>
</g>
<!-- 11 -->
<g id="node12" class="node">
<title>11</title>
<ellipse fill="none" stroke="black" cx="547" cy="-1890" rx="274.05" ry="18"/>
<text text-anchor="middle" x="547" y="-1886.3" font-family="Times,serif" font-size="14.00">attention.output.LayerNorm.bias: Tensor[(768,), float32]</text>
</g>
<!-- 106 -->
<g id="node82" class="node">
<title>106</title>
<polygon fill="none" stroke="black" points="706.5,-1836 635.5,-1836 635.5,-1800 706.5,-1800 706.5,-1836"/>
<text text-anchor="middle" x="671" y="-1814.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 11&#45;&gt;106 -->
<g id="edge80" class="edge">
<title>11&#45;&gt;106</title>
<path fill="none" stroke="black" d="M577.02,-1872.05C593.5,-1862.75 614.13,-1851.11 631.86,-1841.1"/>
<polygon fill="black" stroke="black" points="633.84,-1844 640.83,-1836.03 630.4,-1837.9 633.84,-1844"/>
</g>
<!-- 12 -->
<g id="node13" class="node">
<title>12</title>
<ellipse fill="none" stroke="black" cx="1405" cy="-1962" rx="271.85" ry="18"/>
<text text-anchor="middle" x="1405" y="-1958.3" font-family="Times,serif" font-size="14.00">intermediate.dense.weight: Tensor[(3072, 768), float32]</text>
</g>
<!-- 108 -->
<g id="node84" class="node">
<title>108</title>
<polygon fill="none" stroke="black" points="1501.5,-1908 1308.5,-1908 1308.5,-1872 1501.5,-1872 1501.5,-1908"/>
<text text-anchor="middle" x="1405" y="-1886.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 12&#45;&gt;108 -->
<g id="edge82" class="edge">
<title>12&#45;&gt;108</title>
<path fill="none" stroke="black" d="M1405,-1943.7C1405,-1935.98 1405,-1926.71 1405,-1918.11"/>
<polygon fill="black" stroke="black" points="1408.5,-1918.1 1405,-1908.1 1401.5,-1918.1 1408.5,-1918.1"/>
</g>
<!-- 13 -->
<g id="node14" class="node">
<title>13</title>
<ellipse fill="none" stroke="black" cx="1488" cy="-1602" rx="238.56" ry="18"/>
<text text-anchor="middle" x="1488" y="-1598.3" font-family="Times,serif" font-size="14.00">intermediate.dense.bias: Tensor[(3072,), float32]</text>
</g>
<!-- 115 -->
<g id="node89" class="node">
<title>115</title>
<polygon fill="none" stroke="black" points="1072.5,-1548 1001.5,-1548 1001.5,-1512 1072.5,-1512 1072.5,-1548"/>
<text text-anchor="middle" x="1037" y="-1526.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 13&#45;&gt;115 -->
<g id="edge89" class="edge">
<title>13&#45;&gt;115</title>
<path fill="none" stroke="black" d="M1388.75,-1585.59C1293.58,-1570.82 1154.14,-1549.18 1082.78,-1538.11"/>
<polygon fill="black" stroke="black" points="1083.18,-1534.63 1072.76,-1536.55 1082.1,-1541.54 1083.18,-1534.63"/>
</g>
<!-- 14 -->
<g id="node15" class="node">
<title>14</title>
<ellipse fill="none" stroke="black" cx="1447" cy="-1314" rx="242.36" ry="18"/>
<text text-anchor="middle" x="1447" y="-1310.3" font-family="Times,serif" font-size="14.00">output.dense.weight: Tensor[(768, 3072), float32]</text>
</g>
<!-- 127 -->
<g id="node96" class="node">
<title>127</title>
<polygon fill="none" stroke="black" points="1565.5,-1260 1372.5,-1260 1372.5,-1224 1565.5,-1224 1565.5,-1260"/>
<text text-anchor="middle" x="1469" y="-1238.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[1, 0])</text>
</g>
<!-- 14&#45;&gt;127 -->
<g id="edge97" class="edge">
<title>14&#45;&gt;127</title>
<path fill="none" stroke="black" d="M1452.44,-1295.7C1454.89,-1287.9 1457.84,-1278.51 1460.57,-1269.83"/>
<polygon fill="black" stroke="black" points="1463.96,-1270.69 1463.62,-1260.1 1457.29,-1268.59 1463.96,-1270.69"/>
</g>
<!-- 15 -->
<g id="node16" class="node">
<title>15</title>
<ellipse fill="none" stroke="black" cx="634" cy="-954" rx="203.36" ry="18"/>
<text text-anchor="middle" x="634" y="-950.3" font-family="Times,serif" font-size="14.00">output.dense.bias: Tensor[(768,), float32]</text>
</g>
<!-- 133 -->
<g id="node101" class="node">
<title>133</title>
<polygon fill="none" stroke="black" points="891.5,-900 820.5,-900 820.5,-864 891.5,-864 891.5,-900"/>
<text text-anchor="middle" x="856" y="-878.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 15&#45;&gt;133 -->
<g id="edge104" class="edge">
<title>15&#45;&gt;133</title>
<path fill="none" stroke="black" d="M688.74,-936.57C722.6,-926.3 766.91,-912.66 806,-900 807.44,-899.53 808.91,-899.05 810.39,-898.57"/>
<polygon fill="black" stroke="black" points="811.87,-901.76 820.26,-895.29 809.67,-895.12 811.87,-901.76"/>
</g>
<!-- 16 -->
<g id="node17" class="node">
<title>16</title>
<ellipse fill="none" stroke="black" cx="440" cy="-378" rx="241.26" ry="18"/>
<text text-anchor="middle" x="440" y="-374.3" font-family="Times,serif" font-size="14.00">output.LayerNorm.weight: Tensor[(768,), float32]</text>
</g>
<!-- 145 -->
<g id="node111" class="node">
<title>145</title>
<polygon fill="none" stroke="black" points="850,-324 746,-324 746,-288 850,-288 850,-324"/>
<text text-anchor="middle" x="798" y="-302.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 16&#45;&gt;145 -->
<g id="edge120" class="edge">
<title>16&#45;&gt;145</title>
<path fill="none" stroke="black" d="M521.68,-361.03C586.74,-348.31 676.45,-330.77 735.78,-319.17"/>
<polygon fill="black" stroke="black" points="736.62,-322.57 745.76,-317.21 735.27,-315.7 736.62,-322.57"/>
</g>
<!-- 17 -->
<g id="node18" class="node">
<title>17</title>
<ellipse fill="none" stroke="black" cx="1178" cy="-306" rx="228.56" ry="18"/>
<text text-anchor="middle" x="1178" y="-302.3" font-family="Times,serif" font-size="14.00">output.LayerNorm.bias: Tensor[(768,), float32]</text>
</g>
<!-- 146 -->
<g id="node112" class="node">
<title>146</title>
<polygon fill="none" stroke="black" points="1213.5,-252 1142.5,-252 1142.5,-216 1213.5,-216 1213.5,-252"/>
<text text-anchor="middle" x="1178" y="-230.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 17&#45;&gt;146 -->
<g id="edge122" class="edge">
<title>17&#45;&gt;146</title>
<path fill="none" stroke="black" d="M1178,-287.7C1178,-279.98 1178,-270.71 1178,-262.11"/>
<polygon fill="black" stroke="black" points="1181.5,-262.1 1178,-252.1 1174.5,-262.1 1181.5,-262.1"/>
</g>
<!-- 18 -->
<g id="node19" class="node">
<title>18</title>
<ellipse fill="none" stroke="black" cx="1539" cy="-3330" rx="204.16" ry="18"/>
<text text-anchor="middle" x="1539" y="-3326.3" font-family="Times,serif" font-size="14.00">dropout:0: Tensor[(1, 12, 14, 14), float32]</text>
</g>
<!-- 64 -->
<g id="node47" class="node">
<title>64</title>
<polygon fill="none" stroke="black" points="1627,-3276 1451,-3276 1451,-3240 1627,-3240 1627,-3276"/>
<text text-anchor="middle" x="1539" y="-3254.3" font-family="Times,serif" font-size="14.00">multiply(·, 1.1111112)</text>
</g>
<!-- 18&#45;&gt;64 -->
<g id="edge32" class="edge">
<title>18&#45;&gt;64</title>
<path fill="none" stroke="black" d="M1539,-3311.7C1539,-3303.98 1539,-3294.71 1539,-3286.11"/>
<polygon fill="black" stroke="black" points="1542.5,-3286.1 1539,-3276.1 1535.5,-3286.1 1542.5,-3286.1"/>
</g>
<!-- 19 -->
<g id="node20" class="node">
<title>19</title>
<ellipse fill="none" stroke="black" cx="1934" cy="-2538" rx="192.27" ry="18"/>
<text text-anchor="middle" x="1934" y="-2534.3" font-family="Times,serif" font-size="14.00">dropout:1: Tensor[(1, 14, 768), float32]</text>
</g>
<!-- 92 -->
<g id="node72" class="node">
<title>92</title>
<polygon fill="none" stroke="black" points="2022,-2484 1846,-2484 1846,-2448 2022,-2448 2022,-2484"/>
<text text-anchor="middle" x="1934" y="-2462.3" font-family="Times,serif" font-size="14.00">multiply(·, 1.1111112)</text>
</g>
<!-- 19&#45;&gt;92 -->
<g id="edge63" class="edge">
<title>19&#45;&gt;92</title>
<path fill="none" stroke="black" d="M1934,-2519.7C1934,-2511.98 1934,-2502.71 1934,-2494.11"/>
<polygon fill="black" stroke="black" points="1937.5,-2494.1 1934,-2484.1 1930.5,-2494.1 1937.5,-2494.1"/>
</g>
<!-- 20 -->
<g id="node21" class="node">
<title>20</title>
<ellipse fill="none" stroke="black" cx="220" cy="-954" rx="192.27" ry="18"/>
<text text-anchor="middle" x="220" y="-950.3" font-family="Times,serif" font-size="14.00">dropout:2: Tensor[(1, 14, 768), float32]</text>
</g>
<!-- 135 -->
<g id="node102" class="node">
<title>135</title>
<polygon fill="none" stroke="black" points="797,-900 621,-900 621,-864 797,-864 797,-900"/>
<text text-anchor="middle" x="709" y="-878.3" font-family="Times,serif" font-size="14.00">multiply(·, 1.1111112)</text>
</g>
<!-- 20&#45;&gt;135 -->
<g id="edge105" class="edge">
<title>20&#45;&gt;135</title>
<path fill="none" stroke="black" d="M320.43,-938.62C405.44,-926.45 526.56,-909.12 610.9,-897.04"/>
<polygon fill="black" stroke="black" points="611.6,-900.48 621,-895.6 610.6,-893.55 611.6,-900.48"/>
</g>
<!-- 36 -->
<g id="node26" class="node">
<title>36</title>
<polygon fill="none" stroke="black" points="2146.5,-3996 1977.5,-3996 1977.5,-3960 2146.5,-3960 2146.5,-3996"/>
<text text-anchor="middle" x="2062" y="-3974.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 31&#45;&gt;36 -->
<g id="edge5" class="edge">
<title>31&#45;&gt;36</title>
<path fill="none" stroke="black" d="M1831.6,-4103.87C1879.11,-4078.12 1968.92,-4029.45 2021.28,-4001.07"/>
<polygon fill="black" stroke="black" points="2023.16,-4004.03 2030.28,-3996.19 2019.83,-3997.88 2023.16,-4004.03"/>
</g>
<!-- 48 -->
<g id="node35" class="node">
<title>48</title>
<polygon fill="none" stroke="black" points="2503.5,-4068 2334.5,-4068 2334.5,-4032 2503.5,-4032 2503.5,-4068"/>
<text text-anchor="middle" x="2419" y="-4046.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 31&#45;&gt;48 -->
<g id="edge16" class="edge">
<title>31&#45;&gt;48</title>
<path fill="none" stroke="black" d="M2007.19,-4104C2101.38,-4095.31 2214.42,-4083.54 2324.45,-4068.16"/>
<polygon fill="black" stroke="black" points="2324.99,-4071.62 2334.4,-4066.76 2324.01,-4064.69 2324.99,-4071.62"/>
</g>
<!-- 71 -->
<g id="node53" class="node">
<title>71</title>
<polygon fill="none" stroke="black" points="1800.5,-3564 1631.5,-3564 1631.5,-3528 1800.5,-3528 1800.5,-3564"/>
<text text-anchor="middle" x="1716" y="-3542.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 31&#45;&gt;71 -->
<g id="edge39" class="edge">
<title>31&#45;&gt;71</title>
<path fill="none" stroke="black" d="M1788.4,-4103.93C1771.99,-4078.09 1744,-4026.97 1744,-3979 1744,-3979 1744,-3979 1744,-3689 1744,-3648.27 1732.77,-3602.19 1724.54,-3573.86"/>
<polygon fill="black" stroke="black" points="1727.89,-3572.85 1721.67,-3564.27 1721.18,-3574.86 1727.89,-3572.85"/>
</g>
<!-- 148 -->
<g id="node114" class="node">
<title>148</title>
<polygon fill="none" stroke="black" points="1893,-324 1807,-324 1807,-288 1893,-288 1893,-324"/>
<text text-anchor="middle" x="1850" y="-302.3" font-family="Times,serif" font-size="14.00">Tuple[...])</text>
</g>
<!-- 31&#45;&gt;148 -->
<g id="edge135" class="edge">
<title>31&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1805.67,-4103.73C1813.85,-4077.21 1828,-4024.73 1828,-3979 1828,-3979 1828,-3979 1828,-3185 1828,-3144.55 1822.17,-3127.93 1847,-3096 1957.79,-2953.53 2100.21,-3058.47 2211,-2916 2235.83,-2884.07 2230,-2867.45 2230,-2827 2230,-2827 2230,-2827 2230,-449 2230,-401.3 2210.14,-385.78 2170,-360 2126.81,-332.26 1982.84,-317.15 1903.54,-310.75"/>
<polygon fill="black" stroke="black" points="1903.51,-307.24 1893.26,-309.94 1902.96,-314.22 1903.51,-307.24"/>
</g>
<!-- 34 -->
<g id="node24" class="node">
<title>34</title>
<polygon fill="none" stroke="black" points="2503.5,-4140 2042.5,-4140 2042.5,-4104 2503.5,-4104 2503.5,-4140"/>
<text text-anchor="middle" x="2273" y="-4118.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 32&#45;&gt;34 -->
<g id="edge3" class="edge">
<title>32&#45;&gt;34</title>
<path fill="none" stroke="black" d="M2273,-4175.7C2273,-4167.98 2273,-4158.71 2273,-4150.11"/>
<polygon fill="black" stroke="black" points="2276.5,-4150.1 2273,-4140.1 2269.5,-4150.1 2276.5,-4150.1"/>
</g>
<!-- 35 -->
<g id="node25" class="node">
<title>35</title>
<polygon fill="none" stroke="black" points="2316.5,-4068 2105.5,-4068 2105.5,-4032 2316.5,-4032 2316.5,-4068"/>
<text text-anchor="middle" x="2211" y="-4046.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 34&#45;&gt;35 -->
<g id="edge4" class="edge">
<title>34&#45;&gt;35</title>
<path fill="none" stroke="black" d="M2257.67,-4103.7C2250.24,-4095.3 2241.18,-4085.07 2233.02,-4075.86"/>
<polygon fill="black" stroke="black" points="2235.4,-4073.27 2226.15,-4068.1 2230.16,-4077.91 2235.4,-4073.27"/>
</g>
<!-- 35&#45;&gt;36 -->
<g id="edge6" class="edge">
<title>35&#45;&gt;36</title>
<path fill="none" stroke="black" d="M2174.55,-4031.88C2154.36,-4022.39 2129.08,-4010.51 2107.59,-4000.42"/>
<polygon fill="black" stroke="black" points="2108.87,-3997.15 2098.33,-3996.07 2105.89,-4003.49 2108.87,-3997.15"/>
</g>
<!-- 35&#45;&gt;148 -->
<g id="edge136" class="edge">
<title>35&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2316.54,-4032.98C2319.73,-4032.63 2322.89,-4032.3 2326,-4032 2409.26,-4023.87 3014.24,-4050.15 3078,-3996 3108.83,-3969.82 3097,-3947.45 3097,-3907 3097,-3907 3097,-3907 3097,-449 3097,-326.96 2141.89,-309.77 1903.44,-307.38"/>
<polygon fill="black" stroke="black" points="1903.32,-303.88 1893.29,-307.28 1903.25,-310.88 1903.32,-303.88"/>
</g>
<!-- 38 -->
<g id="node27" class="node">
<title>38</title>
<polygon fill="none" stroke="black" points="2782.5,-3924 2339.5,-3924 2339.5,-3888 2782.5,-3888 2782.5,-3924"/>
<text text-anchor="middle" x="2561" y="-3902.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 36&#45;&gt;38 -->
<g id="edge7" class="edge">
<title>36&#45;&gt;38</title>
<path fill="none" stroke="black" d="M2146.62,-3961.34C2149.44,-3960.88 2152.24,-3960.43 2155,-3960 2237.84,-3947.03 2330.53,-3934.76 2406.41,-3925.3"/>
<polygon fill="black" stroke="black" points="2407.04,-3928.75 2416.53,-3924.04 2406.18,-3921.8 2407.04,-3928.75"/>
</g>
<!-- 38&#45;&gt;39 -->
<g id="edge8" class="edge">
<title>38&#45;&gt;39</title>
<path fill="none" stroke="black" d="M2549.63,-3887.7C2544.28,-3879.56 2537.8,-3869.69 2531.89,-3860.7"/>
<polygon fill="black" stroke="black" points="2534.66,-3858.54 2526.24,-3852.1 2528.81,-3862.38 2534.66,-3858.54"/>
</g>
<!-- 41 -->
<g id="node29" class="node">
<title>41</title>
<polygon fill="none" stroke="black" points="2704.5,-3780 2325.5,-3780 2325.5,-3744 2704.5,-3744 2704.5,-3780"/>
<text text-anchor="middle" x="2515" y="-3758.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 14 12 64]| newshape=..., reverse=0)</text>
</g>
<!-- 39&#45;&gt;41 -->
<g id="edge10" class="edge">
<title>39&#45;&gt;41</title>
<path fill="none" stroke="black" d="M2515,-3815.7C2515,-3807.98 2515,-3798.71 2515,-3790.11"/>
<polygon fill="black" stroke="black" points="2518.5,-3790.1 2515,-3780.1 2511.5,-3790.1 2518.5,-3790.1"/>
</g>
<!-- 42 -->
<g id="node30" class="node">
<title>42</title>
<polygon fill="none" stroke="black" points="2629.5,-3708 2400.5,-3708 2400.5,-3672 2629.5,-3672 2629.5,-3708"/>
<text text-anchor="middle" x="2515" y="-3686.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 41&#45;&gt;42 -->
<g id="edge11" class="edge">
<title>41&#45;&gt;42</title>
<path fill="none" stroke="black" d="M2515,-3743.7C2515,-3735.98 2515,-3726.71 2515,-3718.11"/>
<polygon fill="black" stroke="black" points="2518.5,-3718.1 2515,-3708.1 2511.5,-3718.1 2518.5,-3718.1"/>
</g>
<!-- 44 -->
<g id="node31" class="node">
<title>44</title>
<polygon fill="none" stroke="black" points="2743.5,-3636 2322.5,-3636 2322.5,-3600 2743.5,-3600 2743.5,-3636"/>
<text text-anchor="middle" x="2533" y="-3614.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 14 64]| newshape=[&#45;1, 14, 64], reverse=0)</text>
</g>
<!-- 42&#45;&gt;44 -->
<g id="edge12" class="edge">
<title>42&#45;&gt;44</title>
<path fill="none" stroke="black" d="M2519.45,-3671.7C2521.45,-3663.9 2523.87,-3654.51 2526.1,-3645.83"/>
<polygon fill="black" stroke="black" points="2529.5,-3646.66 2528.6,-3636.1 2522.72,-3644.92 2529.5,-3646.66"/>
</g>
<!-- 56 -->
<g id="node42" class="node">
<title>56</title>
<polygon fill="none" stroke="black" points="2843.5,-3564 2674.5,-3564 2674.5,-3528 2843.5,-3528 2843.5,-3564"/>
<text text-anchor="middle" x="2759" y="-3542.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 44&#45;&gt;56 -->
<g id="edge25" class="edge">
<title>44&#45;&gt;56</title>
<path fill="none" stroke="black" d="M2588,-3599.97C2619.99,-3590.06 2660.48,-3577.51 2694.04,-3567.12"/>
<polygon fill="black" stroke="black" points="2695.24,-3570.41 2703.76,-3564.11 2693.17,-3563.73 2695.24,-3570.41"/>
</g>
<!-- 44&#45;&gt;148 -->
<g id="edge134" class="edge">
<title>44&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2554.19,-3599.97C2580.12,-3577.91 2623.33,-3536.92 2646,-3492 2664.18,-3455.97 2664,-3443.36 2664,-3403 2664,-3403 2664,-3403 2664,-449 2664,-371.52 2084.2,-323.68 1903.27,-310.63"/>
<polygon fill="black" stroke="black" points="1903.41,-307.13 1893.19,-309.91 1902.91,-314.12 1903.41,-307.13"/>
</g>
<!-- 46 -->
<g id="node33" class="node">
<title>46</title>
<polygon fill="none" stroke="black" points="3166.5,-4212 2705.5,-4212 2705.5,-4176 3166.5,-4176 3166.5,-4212"/>
<text text-anchor="middle" x="2936" y="-4190.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 45&#45;&gt;46 -->
<g id="edge14" class="edge">
<title>45&#45;&gt;46</title>
<path fill="none" stroke="black" d="M2936,-4247.7C2936,-4239.98 2936,-4230.71 2936,-4222.11"/>
<polygon fill="black" stroke="black" points="2939.5,-4222.1 2936,-4212.1 2932.5,-4222.1 2939.5,-4222.1"/>
</g>
<!-- 47 -->
<g id="node34" class="node">
<title>47</title>
<polygon fill="none" stroke="black" points="3041.5,-4140 2830.5,-4140 2830.5,-4104 3041.5,-4104 3041.5,-4140"/>
<text text-anchor="middle" x="2936" y="-4118.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 46&#45;&gt;47 -->
<g id="edge15" class="edge">
<title>46&#45;&gt;47</title>
<path fill="none" stroke="black" d="M2936,-4175.7C2936,-4167.98 2936,-4158.71 2936,-4150.11"/>
<polygon fill="black" stroke="black" points="2939.5,-4150.1 2936,-4140.1 2932.5,-4150.1 2939.5,-4150.1"/>
</g>
<!-- 47&#45;&gt;48 -->
<g id="edge17" class="edge">
<title>47&#45;&gt;48</title>
<path fill="none" stroke="black" d="M2830.44,-4106.71C2737.45,-4094.12 2603.23,-4075.94 2513.66,-4063.82"/>
<polygon fill="black" stroke="black" points="2513.89,-4060.32 2503.51,-4062.44 2512.95,-4067.25 2513.89,-4060.32"/>
</g>
<!-- 47&#45;&gt;148 -->
<g id="edge138" class="edge">
<title>47&#45;&gt;148</title>
<path fill="none" stroke="black" d="M3001.16,-4103.99C3059.2,-4084.43 3135,-4046.06 3135,-3979 3135,-3979 3135,-3979 3135,-449 3135,-411.17 3304.15,-444.18 2948,-360 2746.02,-312.26 2094.68,-307.38 1903.33,-307"/>
<polygon fill="black" stroke="black" points="1903.25,-303.5 1893.24,-306.98 1903.24,-310.5 1903.25,-303.5"/>
</g>
<!-- 49 -->
<g id="node36" class="node">
<title>49</title>
<polygon fill="none" stroke="black" points="2607.5,-3996 2164.5,-3996 2164.5,-3960 2607.5,-3960 2607.5,-3996"/>
<text text-anchor="middle" x="2386" y="-3974.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 48&#45;&gt;49 -->
<g id="edge18" class="edge">
<title>48&#45;&gt;49</title>
<path fill="none" stroke="black" d="M2410.84,-4031.7C2407.09,-4023.73 2402.55,-4014.1 2398.38,-4005.26"/>
<polygon fill="black" stroke="black" points="2401.49,-4003.66 2394.06,-3996.1 2395.16,-4006.64 2401.49,-4003.66"/>
</g>
<!-- 49&#45;&gt;50 -->
<g id="edge19" class="edge">
<title>49&#45;&gt;50</title>
<path fill="none" stroke="black" d="M2567.52,-3959.98C2638.06,-3951.63 2719.25,-3939.87 2792,-3924 2795.17,-3923.31 2798.43,-3922.52 2801.69,-3921.66"/>
<polygon fill="black" stroke="black" points="2802.74,-3925.01 2811.44,-3918.96 2800.86,-3918.26 2802.74,-3925.01"/>
</g>
<!-- 51 -->
<g id="node38" class="node">
<title>51</title>
<polygon fill="none" stroke="black" points="3041.5,-3852 2662.5,-3852 2662.5,-3816 3041.5,-3816 3041.5,-3852"/>
<text text-anchor="middle" x="2852" y="-3830.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 14 12 64]| newshape=..., reverse=0)</text>
</g>
<!-- 50&#45;&gt;51 -->
<g id="edge21" class="edge">
<title>50&#45;&gt;51</title>
<path fill="none" stroke="black" d="M2848.24,-3887.7C2848.79,-3879.98 2849.45,-3870.71 2850.06,-3862.11"/>
<polygon fill="black" stroke="black" points="2853.56,-3862.33 2850.78,-3852.1 2846.57,-3861.83 2853.56,-3862.33"/>
</g>
<!-- 52 -->
<g id="node39" class="node">
<title>52</title>
<polygon fill="none" stroke="black" points="2969.5,-3780 2740.5,-3780 2740.5,-3744 2969.5,-3744 2969.5,-3780"/>
<text text-anchor="middle" x="2855" y="-3758.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 3, 1])</text>
</g>
<!-- 51&#45;&gt;52 -->
<g id="edge22" class="edge">
<title>51&#45;&gt;52</title>
<path fill="none" stroke="black" d="M2852.74,-3815.7C2853.07,-3807.98 2853.47,-3798.71 2853.84,-3790.11"/>
<polygon fill="black" stroke="black" points="2857.34,-3790.25 2854.27,-3780.1 2850.34,-3789.95 2857.34,-3790.25"/>
</g>
<!-- 54 -->
<g id="node40" class="node">
<title>54</title>
<polygon fill="none" stroke="black" points="3068.5,-3708 2647.5,-3708 2647.5,-3672 3068.5,-3672 3068.5,-3708"/>
<text text-anchor="middle" x="2858" y="-3686.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 64 14]| newshape=[&#45;1, 64, 14], reverse=0)</text>
</g>
<!-- 52&#45;&gt;54 -->
<g id="edge23" class="edge">
<title>52&#45;&gt;54</title>
<path fill="none" stroke="black" d="M2855.74,-3743.7C2856.07,-3735.98 2856.47,-3726.71 2856.84,-3718.11"/>
<polygon fill="black" stroke="black" points="2860.34,-3718.25 2857.27,-3708.1 2853.34,-3717.95 2860.34,-3718.25"/>
</g>
<!-- 55 -->
<g id="node41" class="node">
<title>55</title>
<polygon fill="none" stroke="black" points="2972.5,-3636 2761.5,-3636 2761.5,-3600 2972.5,-3600 2972.5,-3636"/>
<text text-anchor="middle" x="2867" y="-3614.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 54&#45;&gt;55 -->
<g id="edge24" class="edge">
<title>54&#45;&gt;55</title>
<path fill="none" stroke="black" d="M2860.22,-3671.7C2861.22,-3663.98 2862.41,-3654.71 2863.51,-3646.11"/>
<polygon fill="black" stroke="black" points="2867,-3646.47 2864.8,-3636.1 2860.05,-3645.58 2867,-3646.47"/>
</g>
<!-- 55&#45;&gt;56 -->
<g id="edge26" class="edge">
<title>55&#45;&gt;56</title>
<path fill="none" stroke="black" d="M2840.58,-3599.88C2826.59,-3590.81 2809.22,-3579.55 2794.11,-3569.76"/>
<polygon fill="black" stroke="black" points="2795.82,-3566.69 2785.52,-3564.19 2792.01,-3572.57 2795.82,-3566.69"/>
</g>
<!-- 55&#45;&gt;148 -->
<g id="edge137" class="edge">
<title>55&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2867.79,-3599.96C2868.96,-3573.3 2871,-3520.13 2871,-3475 2871,-3475 2871,-3475 2871,-449 2871,-350.33 2113.35,-316.28 1903.38,-308.74"/>
<polygon fill="black" stroke="black" points="1903.34,-305.24 1893.22,-308.38 1903.09,-312.23 1903.34,-305.24"/>
</g>
<!-- 58 -->
<g id="node43" class="node">
<title>58</title>
<polygon fill="none" stroke="black" points="2636.5,-3492 2257.5,-3492 2257.5,-3456 2636.5,-3456 2636.5,-3492"/>
<text text-anchor="middle" x="2447" y="-3470.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 12 14 14]| newshape=..., reverse=0)</text>
</g>
<!-- 56&#45;&gt;58 -->
<g id="edge27" class="edge">
<title>56&#45;&gt;58</title>
<path fill="none" stroke="black" d="M2683.08,-3527.97C2637.69,-3517.78 2579.9,-3504.82 2532.86,-3494.26"/>
<polygon fill="black" stroke="black" points="2533.52,-3490.82 2522.99,-3492.05 2531.99,-3497.65 2533.52,-3490.82"/>
</g>
<!-- 60 -->
<g id="node44" class="node">
<title>60</title>
<polygon fill="none" stroke="black" points="2635.5,-3420 2528.5,-3420 2528.5,-3384 2635.5,-3384 2635.5,-3420"/>
<text text-anchor="middle" x="2582" y="-3398.3" font-family="Times,serif" font-size="14.00">divide(·, 8.0)</text>
</g>
<!-- 58&#45;&gt;60 -->
<g id="edge28" class="edge">
<title>58&#45;&gt;60</title>
<path fill="none" stroke="black" d="M2480.02,-3455.88C2498.08,-3446.51 2520.65,-3434.81 2539.95,-3424.8"/>
<polygon fill="black" stroke="black" points="2541.58,-3427.9 2548.85,-3420.19 2538.36,-3421.69 2541.58,-3427.9"/>
</g>
<!-- 60&#45;&gt;61 -->
<g id="edge29" class="edge">
<title>60&#45;&gt;61</title>
<path fill="none" stroke="black" d="M2528.26,-3386.15C2525.47,-3385.41 2522.7,-3384.69 2520,-3384 2459.29,-3368.43 2388.84,-3351.87 2343.88,-3341.48"/>
<polygon fill="black" stroke="black" points="2344.38,-3338.01 2333.85,-3339.17 2342.81,-3344.83 2344.38,-3338.01"/>
</g>
<!-- 62 -->
<g id="node46" class="node">
<title>62</title>
<polygon fill="none" stroke="black" points="2385,-3276 2211,-3276 2211,-3240 2385,-3240 2385,-3276"/>
<text text-anchor="middle" x="2298" y="-3254.3" font-family="Times,serif" font-size="14.00">nn.softmax(·| axis=&#45;1)</text>
</g>
<!-- 61&#45;&gt;62 -->
<g id="edge31" class="edge">
<title>61&#45;&gt;62</title>
<path fill="none" stroke="black" d="M2298,-3311.7C2298,-3303.98 2298,-3294.71 2298,-3286.11"/>
<polygon fill="black" stroke="black" points="2301.5,-3286.1 2298,-3276.1 2294.5,-3286.1 2301.5,-3286.1"/>
</g>
<!-- 65 -->
<g id="node48" class="node">
<title>65</title>
<polygon fill="none" stroke="black" points="2119,-3204 2015,-3204 2015,-3168 2119,-3168 2119,-3204"/>
<text text-anchor="middle" x="2067" y="-3182.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 62&#45;&gt;65 -->
<g id="edge33" class="edge">
<title>62&#45;&gt;65</title>
<path fill="none" stroke="black" d="M2241.79,-3239.97C2207.61,-3229.61 2163.92,-3216.37 2128.78,-3205.72"/>
<polygon fill="black" stroke="black" points="2129.69,-3202.34 2119.11,-3202.79 2127.66,-3209.04 2129.69,-3202.34"/>
</g>
<!-- 62&#45;&gt;148 -->
<g id="edge133" class="edge">
<title>62&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2307.53,-3239.66C2321,-3213.47 2344,-3161.9 2344,-3115 2344,-3115 2344,-3115 2344,-449 2344,-408.03 2351.07,-386.7 2320,-360 2288.87,-333.25 2019.61,-315.95 1903.22,-309.66"/>
<polygon fill="black" stroke="black" points="1903.29,-306.16 1893.11,-309.12 1902.91,-313.15 1903.29,-306.16"/>
</g>
<!-- 64&#45;&gt;65 -->
<g id="edge34" class="edge">
<title>64&#45;&gt;65</title>
<path fill="none" stroke="black" d="M1627.08,-3245.32C1733.84,-3231.17 1911.04,-3207.68 2004.64,-3195.27"/>
<polygon fill="black" stroke="black" points="2005.35,-3198.7 2014.81,-3193.92 2004.43,-3191.76 2005.35,-3198.7"/>
</g>
<!-- 64&#45;&gt;148 -->
<g id="edge139" class="edge">
<title>64&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1450.63,-3250.1C1365.97,-3238.08 1251,-3205.17 1251,-3115 1251,-3115 1251,-3115 1251,-2753 1251,-2649.06 1199.16,-2595.11 1271,-2520 1313.6,-2475.46 1487.77,-2504.21 1546,-2484 1783.01,-2401.74 2021,-2429.88 2021,-2179 2021,-2179 2021,-2179 2021,-449 2021,-386.65 1952.87,-346.43 1902.62,-325.25"/>
<polygon fill="black" stroke="black" points="1903.73,-321.93 1893.15,-321.39 1901.09,-328.41 1903.73,-321.93"/>
</g>
<!-- 67 -->
<g id="node49" class="node">
<title>67</title>
<polygon fill="none" stroke="black" points="2277.5,-3132 1856.5,-3132 1856.5,-3096 2277.5,-3096 2277.5,-3132"/>
<text text-anchor="middle" x="2067" y="-3110.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 14 14]| newshape=[&#45;1, 14, 14], reverse=0)</text>
</g>
<!-- 65&#45;&gt;67 -->
<g id="edge35" class="edge">
<title>65&#45;&gt;67</title>
<path fill="none" stroke="black" d="M2067,-3167.7C2067,-3159.98 2067,-3150.71 2067,-3142.11"/>
<polygon fill="black" stroke="black" points="2070.5,-3142.1 2067,-3132.1 2063.5,-3142.1 2070.5,-3142.1"/>
</g>
<!-- 78 -->
<g id="node60" class="node">
<title>78</title>
<polygon fill="none" stroke="black" points="1585.5,-3060 1416.5,-3060 1416.5,-3024 1585.5,-3024 1585.5,-3060"/>
<text text-anchor="middle" x="1501" y="-3038.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 67&#45;&gt;78 -->
<g id="edge48" class="edge">
<title>67&#45;&gt;78</title>
<path fill="none" stroke="black" d="M1929.26,-3095.97C1825.81,-3083.17 1686.83,-3065.98 1595.64,-3054.7"/>
<polygon fill="black" stroke="black" points="1595.95,-3051.22 1585.59,-3053.46 1595.09,-3058.16 1595.95,-3051.22"/>
</g>
<!-- 67&#45;&gt;148 -->
<g id="edge132" class="edge">
<title>67&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2133.37,-3096C2191.92,-3076.53 2268,-3038.29 2268,-2971 2268,-2971 2268,-2971 2268,-449 2268,-408.03 2274.79,-387.03 2244,-360 2194.4,-316.45 1998.95,-308.39 1903.3,-307.1"/>
<polygon fill="black" stroke="black" points="1903.3,-303.6 1893.26,-306.98 1903.22,-310.6 1903.3,-303.6"/>
</g>
<!-- 69 -->
<g id="node51" class="node">
<title>69</title>
<polygon fill="none" stroke="black" points="2325.5,-3708 1864.5,-3708 1864.5,-3672 2325.5,-3672 2325.5,-3708"/>
<text text-anchor="middle" x="2095" y="-3686.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 68&#45;&gt;69 -->
<g id="edge37" class="edge">
<title>68&#45;&gt;69</title>
<path fill="none" stroke="black" d="M2107.8,-3743.7C2105.9,-3735.9 2103.62,-3726.51 2101.52,-3717.83"/>
<polygon fill="black" stroke="black" points="2104.92,-3717 2099.15,-3708.1 2098.11,-3718.65 2104.92,-3717"/>
</g>
<!-- 70 -->
<g id="node52" class="node">
<title>70</title>
<polygon fill="none" stroke="black" points="2152.5,-3636 1941.5,-3636 1941.5,-3600 2152.5,-3600 2152.5,-3636"/>
<text text-anchor="middle" x="2047" y="-3614.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 69&#45;&gt;70 -->
<g id="edge38" class="edge">
<title>69&#45;&gt;70</title>
<path fill="none" stroke="black" d="M2083.13,-3671.7C2077.5,-3663.47 2070.65,-3653.48 2064.43,-3644.42"/>
<polygon fill="black" stroke="black" points="2067.27,-3642.37 2058.73,-3636.1 2061.5,-3646.33 2067.27,-3642.37"/>
</g>
<!-- 70&#45;&gt;71 -->
<g id="edge40" class="edge">
<title>70&#45;&gt;71</title>
<path fill="none" stroke="black" d="M1966.45,-3599.97C1918.1,-3589.74 1856.48,-3576.71 1806.46,-3566.13"/>
<polygon fill="black" stroke="black" points="1807.13,-3562.69 1796.62,-3564.05 1805.68,-3569.54 1807.13,-3562.69"/>
</g>
<!-- 70&#45;&gt;148 -->
<g id="edge141" class="edge">
<title>70&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2044.85,-3599.76C2040.62,-3559.91 2034.18,-3459.12 2066,-3384 2138.28,-3213.35 2306,-3228.33 2306,-3043 2306,-3043 2306,-3043 2306,-449 2306,-406.16 2303.07,-385.97 2269,-360 2212.68,-317.07 2002.8,-308.75 1903.16,-307.25"/>
<polygon fill="black" stroke="black" points="1903.05,-303.75 1893.01,-307.12 1902.96,-310.75 1903.05,-303.75"/>
</g>
<!-- 72 -->
<g id="node54" class="node">
<title>72</title>
<polygon fill="none" stroke="black" points="1776.5,-3492 1333.5,-3492 1333.5,-3456 1776.5,-3456 1776.5,-3492"/>
<text text-anchor="middle" x="1555" y="-3470.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 71&#45;&gt;72 -->
<g id="edge41" class="edge">
<title>71&#45;&gt;72</title>
<path fill="none" stroke="black" d="M1676.61,-3527.88C1654.61,-3518.31 1626.99,-3506.3 1603.64,-3496.15"/>
<polygon fill="black" stroke="black" points="1604.82,-3492.85 1594.26,-3492.07 1602.03,-3499.27 1604.82,-3492.85"/>
</g>
<!-- 72&#45;&gt;73 -->
<g id="edge42" class="edge">
<title>72&#45;&gt;73</title>
<path fill="none" stroke="black" d="M1438.68,-3455.97C1337.01,-3441.08 1194.7,-3420.24 1122.66,-3409.69"/>
<polygon fill="black" stroke="black" points="1122.95,-3406.19 1112.55,-3408.21 1121.94,-3413.12 1122.95,-3406.19"/>
</g>
<!-- 74 -->
<g id="node56" class="node">
<title>74</title>
<polygon fill="none" stroke="black" points="1254.5,-3348 875.5,-3348 875.5,-3312 1254.5,-3312 1254.5,-3348"/>
<text text-anchor="middle" x="1065" y="-3326.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 14 12 64]| newshape=..., reverse=0)</text>
</g>
<!-- 73&#45;&gt;74 -->
<g id="edge44" class="edge">
<title>73&#45;&gt;74</title>
<path fill="none" stroke="black" d="M1074.03,-3383.7C1072.71,-3375.98 1071.12,-3366.71 1069.65,-3358.11"/>
<polygon fill="black" stroke="black" points="1073.07,-3357.37 1067.93,-3348.1 1066.17,-3358.55 1073.07,-3357.37"/>
</g>
<!-- 75 -->
<g id="node57" class="node">
<title>75</title>
<polygon fill="none" stroke="black" points="1132.5,-3276 903.5,-3276 903.5,-3240 1132.5,-3240 1132.5,-3276"/>
<text text-anchor="middle" x="1018" y="-3254.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 74&#45;&gt;75 -->
<g id="edge45" class="edge">
<title>74&#45;&gt;75</title>
<path fill="none" stroke="black" d="M1053.38,-3311.7C1047.86,-3303.47 1041.15,-3293.48 1035.07,-3284.42"/>
<polygon fill="black" stroke="black" points="1037.96,-3282.46 1029.48,-3276.1 1032.15,-3286.36 1037.96,-3282.46"/>
</g>
<!-- 76 -->
<g id="node58" class="node">
<title>76</title>
<polygon fill="none" stroke="black" points="1222.5,-3204 801.5,-3204 801.5,-3168 1222.5,-3168 1222.5,-3204"/>
<text text-anchor="middle" x="1012" y="-3182.3" font-family="Times,serif" font-size="14.00">reshape(·, [&#45;1 14 64]| newshape=[&#45;1, 14, 64], reverse=0)</text>
</g>
<!-- 75&#45;&gt;76 -->
<g id="edge46" class="edge">
<title>75&#45;&gt;76</title>
<path fill="none" stroke="black" d="M1016.52,-3239.7C1015.86,-3231.98 1015.06,-3222.71 1014.32,-3214.11"/>
<polygon fill="black" stroke="black" points="1017.81,-3213.77 1013.47,-3204.1 1010.83,-3214.37 1017.81,-3213.77"/>
</g>
<!-- 77 -->
<g id="node59" class="node">
<title>77</title>
<polygon fill="none" stroke="black" points="1117.5,-3132 906.5,-3132 906.5,-3096 1117.5,-3096 1117.5,-3132"/>
<text text-anchor="middle" x="1012" y="-3110.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 76&#45;&gt;77 -->
<g id="edge47" class="edge">
<title>76&#45;&gt;77</title>
<path fill="none" stroke="black" d="M1012,-3167.7C1012,-3159.98 1012,-3150.71 1012,-3142.11"/>
<polygon fill="black" stroke="black" points="1015.5,-3142.1 1012,-3132.1 1008.5,-3142.1 1015.5,-3142.1"/>
</g>
<!-- 77&#45;&gt;78 -->
<g id="edge49" class="edge">
<title>77&#45;&gt;78</title>
<path fill="none" stroke="black" d="M1117.5,-3097.9C1203.43,-3085.6 1323.67,-3068.39 1406.49,-3056.53"/>
<polygon fill="black" stroke="black" points="1406.99,-3059.99 1416.39,-3055.11 1406,-3053.06 1406.99,-3059.99"/>
</g>
<!-- 77&#45;&gt;148 -->
<g id="edge140" class="edge">
<title>77&#45;&gt;148</title>
<path fill="none" stroke="black" d="M906.13,-3107.7C814.97,-3096.75 698,-3064.48 698,-2971 698,-2971 698,-2971 698,-2465 698,-1975.83 1372.56,-2355.55 1686,-1980 1797.61,-1846.28 1793,-1777.17 1793,-1603 1793,-1603 1793,-1603 1793,-449 1793,-405.8 1816,-360.19 1832.78,-332.66"/>
<polygon fill="black" stroke="black" points="1835.83,-334.37 1838.19,-324.04 1829.91,-330.65 1835.83,-334.37"/>
</g>
<!-- 80 -->
<g id="node61" class="node">
<title>80</title>
<polygon fill="none" stroke="black" points="1690.5,-2988 1311.5,-2988 1311.5,-2952 1690.5,-2952 1690.5,-2988"/>
<text text-anchor="middle" x="1501" y="-2966.3" font-family="Times,serif" font-size="14.00">reshape(·, [ 1 12 14 64]| newshape=..., reverse=0)</text>
</g>
<!-- 78&#45;&gt;80 -->
<g id="edge50" class="edge">
<title>78&#45;&gt;80</title>
<path fill="none" stroke="black" d="M1501,-3023.7C1501,-3015.98 1501,-3006.71 1501,-2998.11"/>
<polygon fill="black" stroke="black" points="1504.5,-2998.1 1501,-2988.1 1497.5,-2998.1 1504.5,-2998.1"/>
</g>
<!-- 81 -->
<g id="node62" class="node">
<title>81</title>
<polygon fill="none" stroke="black" points="1615.5,-2916 1386.5,-2916 1386.5,-2880 1615.5,-2880 1615.5,-2916"/>
<text text-anchor="middle" x="1501" y="-2894.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1, 3])</text>
</g>
<!-- 80&#45;&gt;81 -->
<g id="edge51" class="edge">
<title>80&#45;&gt;81</title>
<path fill="none" stroke="black" d="M1501,-2951.7C1501,-2943.98 1501,-2934.71 1501,-2926.11"/>
<polygon fill="black" stroke="black" points="1504.5,-2926.1 1501,-2916.1 1497.5,-2926.1 1504.5,-2926.1"/>
</g>
<!-- 82 -->
<g id="node63" class="node">
<title>82</title>
<polygon fill="none" stroke="black" points="1533.5,-2844 1468.5,-2844 1468.5,-2808 1533.5,-2808 1533.5,-2844"/>
<text text-anchor="middle" x="1501" y="-2822.3" font-family="Times,serif" font-size="14.00">copy(·)</text>
</g>
<!-- 81&#45;&gt;82 -->
<g id="edge52" class="edge">
<title>81&#45;&gt;82</title>
<path fill="none" stroke="black" d="M1501,-2879.7C1501,-2871.98 1501,-2862.71 1501,-2854.11"/>
<polygon fill="black" stroke="black" points="1504.5,-2854.1 1501,-2844.1 1497.5,-2854.1 1504.5,-2854.1"/>
</g>
<!-- 83 -->
<g id="node64" class="node">
<title>83</title>
<polygon fill="none" stroke="black" points="1722.5,-2772 1279.5,-2772 1279.5,-2736 1722.5,-2736 1722.5,-2772"/>
<text text-anchor="middle" x="1501" y="-2750.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 82&#45;&gt;83 -->
<g id="edge53" class="edge">
<title>82&#45;&gt;83</title>
<path fill="none" stroke="black" d="M1501,-2807.7C1501,-2799.98 1501,-2790.71 1501,-2782.11"/>
<polygon fill="black" stroke="black" points="1504.5,-2782.1 1501,-2772.1 1497.5,-2782.1 1504.5,-2782.1"/>
</g>
<!-- 84 -->
<g id="node65" class="node">
<title>84</title>
<polygon fill="none" stroke="black" points="1964,-2700 1516,-2700 1516,-2664 1964,-2664 1964,-2700"/>
<text text-anchor="middle" x="1740" y="-2678.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 83&#45;&gt;84 -->
<g id="edge54" class="edge">
<title>83&#45;&gt;84</title>
<path fill="none" stroke="black" d="M1559.16,-2735.97C1593.13,-2726.01 1636.18,-2713.41 1671.76,-2702.99"/>
<polygon fill="black" stroke="black" points="1672.97,-2706.28 1681.58,-2700.11 1671,-2699.56 1672.97,-2706.28"/>
</g>
<!-- 88 -->
<g id="node69" class="node">
<title>88</title>
<polygon fill="none" stroke="black" points="2164.5,-2628 1995.5,-2628 1995.5,-2592 2164.5,-2592 2164.5,-2628"/>
<text text-anchor="middle" x="2080" y="-2606.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 84&#45;&gt;88 -->
<g id="edge58" class="edge">
<title>84&#45;&gt;88</title>
<path fill="none" stroke="black" d="M1822.74,-2663.97C1872.4,-2653.74 1935.7,-2640.71 1987.08,-2630.13"/>
<polygon fill="black" stroke="black" points="1988.1,-2633.49 1997.19,-2628.05 1986.69,-2626.64 1988.1,-2633.49"/>
</g>
<!-- 84&#45;&gt;148 -->
<g id="edge131" class="edge">
<title>84&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1782.63,-2663.96C1831.3,-2644.87 1913.58,-2613.78 1986,-2592 2051.24,-2572.38 2087.47,-2604.81 2135,-2556 2163.22,-2527.02 2154,-2507.45 2154,-2467 2154,-2467 2154,-2467 2154,-449 2154,-339.71 1990.71,-314.34 1903.48,-308.59"/>
<polygon fill="black" stroke="black" points="1903.42,-305.08 1893.23,-307.98 1903.01,-312.07 1903.42,-305.08"/>
</g>
<!-- 86 -->
<g id="node67" class="node">
<title>86</title>
<polygon fill="none" stroke="black" points="2201.5,-2772 1740.5,-2772 1740.5,-2736 2201.5,-2736 2201.5,-2772"/>
<text text-anchor="middle" x="1971" y="-2750.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 768 768]| newshape=[&#45;1, 768, 768], reverse=0)</text>
</g>
<!-- 85&#45;&gt;86 -->
<g id="edge56" class="edge">
<title>85&#45;&gt;86</title>
<path fill="none" stroke="black" d="M1951.43,-2807.7C1954.36,-2799.81 1957.89,-2790.3 1961.14,-2781.55"/>
<polygon fill="black" stroke="black" points="1964.45,-2782.7 1964.65,-2772.1 1957.88,-2780.26 1964.45,-2782.7"/>
</g>
<!-- 87 -->
<g id="node68" class="node">
<title>87</title>
<polygon fill="none" stroke="black" points="2193.5,-2700 1982.5,-2700 1982.5,-2664 2193.5,-2664 2193.5,-2700"/>
<text text-anchor="middle" x="2088" y="-2678.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 86&#45;&gt;87 -->
<g id="edge57" class="edge">
<title>86&#45;&gt;87</title>
<path fill="none" stroke="black" d="M1999.62,-2735.88C2014.92,-2726.72 2033.94,-2715.34 2050.42,-2705.48"/>
<polygon fill="black" stroke="black" points="2052.48,-2708.33 2059.27,-2700.19 2048.89,-2702.32 2052.48,-2708.33"/>
</g>
<!-- 87&#45;&gt;88 -->
<g id="edge59" class="edge">
<title>87&#45;&gt;88</title>
<path fill="none" stroke="black" d="M2086.02,-2663.7C2085.14,-2655.98 2084.08,-2646.71 2083.1,-2638.11"/>
<polygon fill="black" stroke="black" points="2086.57,-2637.64 2081.95,-2628.1 2079.61,-2638.44 2086.57,-2637.64"/>
</g>
<!-- 87&#45;&gt;148 -->
<g id="edge142" class="edge">
<title>87&#45;&gt;148</title>
<path fill="none" stroke="black" d="M2129.12,-2663.99C2145.09,-2655.43 2162.11,-2643.52 2173,-2628 2196.23,-2594.89 2192,-2579.45 2192,-2539 2192,-2539 2192,-2539 2192,-449 2192,-402.03 2173.97,-386.23 2135,-360 2097.89,-335.02 1974.91,-319.18 1903.2,-311.83"/>
<polygon fill="black" stroke="black" points="1903.3,-308.32 1893,-310.8 1902.6,-315.28 1903.3,-308.32"/>
</g>
<!-- 89 -->
<g id="node70" class="node">
<title>89</title>
<polygon fill="none" stroke="black" points="1723.5,-2556 1280.5,-2556 1280.5,-2520 1723.5,-2520 1723.5,-2556"/>
<text text-anchor="middle" x="1502" y="-2534.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 88&#45;&gt;89 -->
<g id="edge60" class="edge">
<title>88&#45;&gt;89</title>
<path fill="none" stroke="black" d="M1995.33,-2598.75C1905.56,-2587.87 1761.92,-2570.48 1652.73,-2557.25"/>
<polygon fill="black" stroke="black" points="1652.96,-2553.76 1642.61,-2556.03 1652.11,-2560.71 1652.96,-2553.76"/>
</g>
<!-- 89&#45;&gt;90 -->
<g id="edge61" class="edge">
<title>89&#45;&gt;90</title>
<path fill="none" stroke="black" d="M1502,-2519.7C1502,-2511.98 1502,-2502.71 1502,-2494.11"/>
<polygon fill="black" stroke="black" points="1505.5,-2494.1 1502,-2484.1 1498.5,-2494.1 1505.5,-2494.1"/>
</g>
<!-- 93 -->
<g id="node73" class="node">
<title>93</title>
<polygon fill="none" stroke="black" points="1635,-2412 1531,-2412 1531,-2376 1635,-2376 1635,-2412"/>
<text text-anchor="middle" x="1583" y="-2390.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 90&#45;&gt;93 -->
<g id="edge64" class="edge">
<title>90&#45;&gt;93</title>
<path fill="none" stroke="black" d="M1522.02,-2447.7C1532.13,-2438.97 1544.54,-2428.24 1555.52,-2418.75"/>
<polygon fill="black" stroke="black" points="1557.93,-2421.29 1563.21,-2412.1 1553.35,-2415.99 1557.93,-2421.29"/>
</g>
<!-- 92&#45;&gt;93 -->
<g id="edge65" class="edge">
<title>92&#45;&gt;93</title>
<path fill="none" stroke="black" d="M1848.58,-2447.97C1785.7,-2435.42 1701.64,-2418.66 1645.08,-2407.38"/>
<polygon fill="black" stroke="black" points="1645.69,-2403.93 1635.2,-2405.41 1644.32,-2410.8 1645.69,-2403.93"/>
</g>
<!-- 92&#45;&gt;148 -->
<g id="edge143" class="edge">
<title>92&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1964.9,-2447.82C2001.91,-2424.69 2059,-2379.82 2059,-2323 2059,-2323 2059,-2323 2059,-449 2059,-408.03 2063.74,-389.19 2035,-360 2000.54,-324.99 1944.63,-312.71 1903.28,-308.61"/>
<polygon fill="black" stroke="black" points="1903.37,-305.1 1893.11,-307.73 1902.77,-312.07 1903.37,-305.1"/>
</g>
<!-- 93&#45;&gt;94 -->
<g id="edge66" class="edge">
<title>93&#45;&gt;94</title>
<path fill="none" stroke="black" d="M1583,-2375.7C1583,-2367.98 1583,-2358.71 1583,-2350.11"/>
<polygon fill="black" stroke="black" points="1586.5,-2350.1 1583,-2340.1 1579.5,-2350.1 1586.5,-2350.1"/>
</g>
<!-- 96 -->
<g id="node75" class="node">
<title>96</title>
<polygon fill="none" stroke="black" points="1746.5,-2268 1419.5,-2268 1419.5,-2232 1746.5,-2232 1746.5,-2268"/>
<text text-anchor="middle" x="1583" y="-2246.3" font-family="Times,serif" font-size="14.00">mean(·| axis=[&#45;1], keepdims=1, exclude=0)</text>
</g>
<!-- 94&#45;&gt;96 -->
<g id="edge68" class="edge">
<title>94&#45;&gt;96</title>
<path fill="none" stroke="black" d="M1583,-2303.7C1583,-2295.98 1583,-2286.71 1583,-2278.11"/>
<polygon fill="black" stroke="black" points="1586.5,-2278.1 1583,-2268.1 1579.5,-2278.1 1586.5,-2278.1"/>
</g>
<!-- 97 -->
<g id="node76" class="node">
<title>97</title>
<polygon fill="none" stroke="black" points="1536.5,-2196 1431.5,-2196 1431.5,-2160 1536.5,-2160 1536.5,-2196"/>
<text text-anchor="middle" x="1484" y="-2174.3" font-family="Times,serif" font-size="14.00">subtract(·, ·)</text>
</g>
<!-- 94&#45;&gt;97 -->
<g id="edge69" class="edge">
<title>94&#45;&gt;97</title>
<path fill="none" stroke="black" d="M1547.21,-2321.57C1506.46,-2320.05 1441.47,-2310.56 1410,-2268 1400.49,-2255.14 1402.69,-2246.23 1410,-2232 1416.27,-2219.79 1426.81,-2209.77 1437.99,-2201.85"/>
<polygon fill="black" stroke="black" points="1440.16,-2204.61 1446.61,-2196.2 1436.33,-2198.75 1440.16,-2204.61"/>
</g>
<!-- 100 -->
<g id="node77" class="node">
<title>100</title>
<polygon fill="none" stroke="black" points="1955,-2196 1593,-2196 1593,-2160 1955,-2160 1955,-2196"/>
<text text-anchor="middle" x="1774" y="-2174.3" font-family="Times,serif" font-size="14.00">variance(·, ·| axis=[&#45;1], keepdims=1, exclude=0)</text>
</g>
<!-- 94&#45;&gt;100 -->
<g id="edge71" class="edge">
<title>94&#45;&gt;100</title>
<path fill="none" stroke="black" d="M1618.84,-2320.14C1658.06,-2317.19 1719.98,-2306.27 1755,-2268 1770.21,-2251.38 1774.37,-2225.66 1775.07,-2206.14"/>
<polygon fill="black" stroke="black" points="1778.57,-2206.05 1775.15,-2196.03 1771.57,-2206 1778.57,-2206.05"/>
</g>
<!-- 94&#45;&gt;148 -->
<g id="edge130" class="edge">
<title>94&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1618.85,-2316.13C1717.03,-2301.81 1983,-2255.91 1983,-2179 1983,-2179 1983,-2179 1983,-449 1983,-395.21 1931.54,-353.4 1892.64,-329.47"/>
<polygon fill="black" stroke="black" points="1893.94,-326.18 1883.56,-324.07 1890.36,-332.19 1893.94,-326.18"/>
</g>
<!-- 96&#45;&gt;97 -->
<g id="edge70" class="edge">
<title>96&#45;&gt;97</title>
<path fill="none" stroke="black" d="M1558.78,-2231.88C1546.08,-2222.89 1530.34,-2211.76 1516.58,-2202.03"/>
<polygon fill="black" stroke="black" points="1518.5,-2199.11 1508.31,-2196.19 1514.46,-2204.82 1518.5,-2199.11"/>
</g>
<!-- 96&#45;&gt;100 -->
<g id="edge72" class="edge">
<title>96&#45;&gt;100</title>
<path fill="none" stroke="black" d="M1629.72,-2231.88C1656.3,-2222.14 1689.76,-2209.87 1717.77,-2199.61"/>
<polygon fill="black" stroke="black" points="1719.24,-2202.8 1727.43,-2196.07 1716.83,-2196.22 1719.24,-2202.8"/>
</g>
<!-- 96&#45;&gt;148 -->
<g id="edge144" class="edge">
<title>96&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1580.09,-2231.74C1577.71,-2213.41 1575.9,-2183.85 1584,-2160 1616.16,-2065.31 1667.09,-2065.47 1719,-1980 1778.64,-1881.79 1831,-1861.9 1831,-1747 1831,-1747 1831,-1747 1831,-449 1831,-408.86 1838.55,-362.9 1844.13,-334.4"/>
<polygon fill="black" stroke="black" points="1847.61,-334.88 1846.16,-324.39 1840.74,-333.5 1847.61,-334.88"/>
</g>
<!-- 104 -->
<g id="node80" class="node">
<title>104</title>
<polygon fill="none" stroke="black" points="1860.5,-1980 1771.5,-1980 1771.5,-1944 1860.5,-1944 1860.5,-1980"/>
<text text-anchor="middle" x="1816" y="-1958.3" font-family="Times,serif" font-size="14.00">divide(·, ·)</text>
</g>
<!-- 97&#45;&gt;104 -->
<g id="edge75" class="edge">
<title>97&#45;&gt;104</title>
<path fill="none" stroke="black" d="M1510.61,-2159.85C1570.1,-2121.5 1713.89,-2028.82 1780.74,-1985.73"/>
<polygon fill="black" stroke="black" points="1782.76,-1988.59 1789.27,-1980.23 1778.96,-1982.71 1782.76,-1988.59"/>
</g>
<!-- 101 -->
<g id="node78" class="node">
<title>101</title>
<polygon fill="none" stroke="black" points="1819.5,-2124 1758.5,-2124 1758.5,-2088 1819.5,-2088 1819.5,-2124"/>
<text text-anchor="middle" x="1789" y="-2102.3" font-family="Times,serif" font-size="14.00">sqrt(·)</text>
</g>
<!-- 100&#45;&gt;101 -->
<g id="edge73" class="edge">
<title>100&#45;&gt;101</title>
<path fill="none" stroke="black" d="M1777.71,-2159.7C1779.36,-2151.98 1781.35,-2142.71 1783.19,-2134.11"/>
<polygon fill="black" stroke="black" points="1786.66,-2134.62 1785.33,-2124.1 1779.82,-2133.15 1786.66,-2134.62"/>
</g>
<!-- 100&#45;&gt;148 -->
<g id="edge146" class="edge">
<title>100&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1823.92,-2159.87C1874.39,-2139.01 1945,-2098.48 1945,-2035 1945,-2035 1945,-2035 1945,-449 1945,-401.01 1907.06,-356.96 1879.13,-331.04"/>
<polygon fill="black" stroke="black" points="1881.33,-328.31 1871.56,-324.22 1876.65,-333.52 1881.33,-328.31"/>
</g>
<!-- 103 -->
<g id="node79" class="node">
<title>103</title>
<polygon fill="none" stroke="black" points="1869.5,-2052 1762.5,-2052 1762.5,-2016 1869.5,-2016 1869.5,-2052"/>
<text text-anchor="middle" x="1816" y="-2030.3" font-family="Times,serif" font-size="14.00">add(·, 1e&#45;12)</text>
</g>
<!-- 101&#45;&gt;103 -->
<g id="edge74" class="edge">
<title>101&#45;&gt;103</title>
<path fill="none" stroke="black" d="M1795.67,-2087.7C1798.71,-2079.81 1802.38,-2070.3 1805.76,-2061.55"/>
<polygon fill="black" stroke="black" points="1809.07,-2062.69 1809.4,-2052.1 1802.54,-2060.17 1809.07,-2062.69"/>
</g>
<!-- 103&#45;&gt;104 -->
<g id="edge76" class="edge">
<title>103&#45;&gt;104</title>
<path fill="none" stroke="black" d="M1816,-2015.7C1816,-2007.98 1816,-1998.71 1816,-1990.11"/>
<polygon fill="black" stroke="black" points="1819.5,-1990.1 1816,-1980.1 1812.5,-1990.1 1819.5,-1990.1"/>
</g>
<!-- 103&#45;&gt;148 -->
<g id="edge145" class="edge">
<title>103&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1836.65,-2015.58C1863.5,-1990.87 1907,-1942.83 1907,-1891 1907,-1891 1907,-1891 1907,-449 1907,-405.8 1884,-360.19 1867.22,-332.66"/>
<polygon fill="black" stroke="black" points="1870.09,-330.65 1861.81,-324.04 1864.17,-334.37 1870.09,-330.65"/>
</g>
<!-- 104&#45;&gt;105 -->
<g id="edge77" class="edge">
<title>104&#45;&gt;105</title>
<path fill="none" stroke="black" d="M1771.26,-1954.44C1746.23,-1950.94 1714.43,-1946.8 1686,-1944 1413.65,-1917.14 1087.43,-1900.14 953.27,-1893.8"/>
<polygon fill="black" stroke="black" points="953.34,-1890.3 943.18,-1893.33 953.01,-1897.3 953.34,-1890.3"/>
</g>
<!-- 104&#45;&gt;148 -->
<g id="edge129" class="edge">
<title>104&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1826.98,-1943.84C1842.51,-1917.89 1869,-1866.63 1869,-1819 1869,-1819 1869,-1819 1869,-449 1869,-408.86 1861.45,-362.9 1855.87,-334.4"/>
<polygon fill="black" stroke="black" points="1859.26,-333.5 1853.84,-324.39 1852.39,-334.88 1859.26,-333.5"/>
</g>
<!-- 105&#45;&gt;106 -->
<g id="edge79" class="edge">
<title>105&#45;&gt;106</title>
<path fill="none" stroke="black" d="M838.86,-1872.41C801.58,-1860.55 751.9,-1844.74 716.28,-1833.41"/>
<polygon fill="black" stroke="black" points="717.21,-1830.03 706.62,-1830.33 715.09,-1836.7 717.21,-1830.03"/>
</g>
<!-- 107 -->
<g id="node83" class="node">
<title>107</title>
<polygon fill="none" stroke="black" points="957,-1764 509,-1764 509,-1728 957,-1728 957,-1764"/>
<text text-anchor="middle" x="733" y="-1742.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#45;1 &#160;14 768]| newshape=[&#45;1, 14, 768], reverse=0)</text>
</g>
<!-- 106&#45;&gt;107 -->
<g id="edge81" class="edge">
<title>106&#45;&gt;107</title>
<path fill="none" stroke="black" d="M686.33,-1799.7C693.76,-1791.3 702.82,-1781.07 710.98,-1771.86"/>
<polygon fill="black" stroke="black" points="713.84,-1773.91 717.85,-1764.1 708.6,-1769.27 713.84,-1773.91"/>
</g>
<!-- 137 -->
<g id="node104" class="node">
<title>137</title>
<polygon fill="none" stroke="black" points="901.5,-756 830.5,-756 830.5,-720 901.5,-720 901.5,-756"/>
<text text-anchor="middle" x="866" y="-734.3" font-family="Times,serif" font-size="14.00">add(·, ·)</text>
</g>
<!-- 106&#45;&gt;137 -->
<g id="edge109" class="edge">
<title>106&#45;&gt;137</title>
<path fill="none" stroke="black" d="M635.4,-1817.14C496.68,-1816.73 0,-1806.13 0,-1675 0,-1675 0,-1675 0,-881 0,-797.07 641.66,-752.42 820.32,-741.61"/>
<polygon fill="black" stroke="black" points="820.66,-745.1 830.44,-741 820.25,-738.11 820.66,-745.1"/>
</g>
<!-- 112 -->
<g id="node87" class="node">
<title>112</title>
<polygon fill="none" stroke="black" points="1080.5,-1692 911.5,-1692 911.5,-1656 1080.5,-1656 1080.5,-1692"/>
<text text-anchor="middle" x="996" y="-1670.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 107&#45;&gt;112 -->
<g id="edge85" class="edge">
<title>107&#45;&gt;112</title>
<path fill="none" stroke="black" d="M797,-1727.97C834.71,-1717.93 882.57,-1705.19 921.91,-1694.72"/>
<polygon fill="black" stroke="black" points="922.95,-1698.07 931.71,-1692.11 921.15,-1691.3 922.95,-1698.07"/>
</g>
<!-- 107&#45;&gt;148 -->
<g id="edge128" class="edge">
<title>107&#45;&gt;148</title>
<path fill="none" stroke="black" d="M731.56,-1727.82C724.8,-1637.07 703.77,-1207.36 884,-936 1032.93,-711.77 1327,-792.18 1327,-523 1327,-523 1327,-523 1327,-449 1327,-406.5 1328.22,-385.8 1362,-360 1396.02,-334.02 1677.17,-316.18 1796.63,-309.7"/>
<polygon fill="black" stroke="black" points="1796.85,-313.19 1806.65,-309.16 1796.48,-306.2 1796.85,-313.19"/>
</g>
<!-- 110 -->
<g id="node85" class="node">
<title>110</title>
<polygon fill="none" stroke="black" points="1603.5,-1836 1206.5,-1836 1206.5,-1800 1603.5,-1800 1603.5,-1836"/>
<text text-anchor="middle" x="1405" y="-1814.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#45;1 &#160;768 3072]| newshape=..., reverse=0)</text>
</g>
<!-- 108&#45;&gt;110 -->
<g id="edge83" class="edge">
<title>108&#45;&gt;110</title>
<path fill="none" stroke="black" d="M1405,-1871.7C1405,-1863.98 1405,-1854.71 1405,-1846.11"/>
<polygon fill="black" stroke="black" points="1408.5,-1846.1 1405,-1836.1 1401.5,-1846.1 1408.5,-1846.1"/>
</g>
<!-- 111 -->
<g id="node86" class="node">
<title>111</title>
<polygon fill="none" stroke="black" points="1510.5,-1764 1299.5,-1764 1299.5,-1728 1510.5,-1728 1510.5,-1764"/>
<text text-anchor="middle" x="1405" y="-1742.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 110&#45;&gt;111 -->
<g id="edge84" class="edge">
<title>110&#45;&gt;111</title>
<path fill="none" stroke="black" d="M1405,-1799.7C1405,-1791.98 1405,-1782.71 1405,-1774.11"/>
<polygon fill="black" stroke="black" points="1408.5,-1774.1 1405,-1764.1 1401.5,-1774.1 1408.5,-1774.1"/>
</g>
<!-- 111&#45;&gt;112 -->
<g id="edge86" class="edge">
<title>111&#45;&gt;112</title>
<path fill="none" stroke="black" d="M1305.47,-1727.97C1240.16,-1716.79 1155.24,-1702.25 1090.89,-1691.24"/>
<polygon fill="black" stroke="black" points="1091.19,-1687.74 1080.74,-1689.5 1090.01,-1694.64 1091.19,-1687.74"/>
</g>
<!-- 111&#45;&gt;148 -->
<g id="edge147" class="edge">
<title>111&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1490.62,-1727.99C1594.24,-1704.89 1755,-1659.89 1755,-1603 1755,-1603 1755,-1603 1755,-449 1755,-408.03 1755.99,-393.89 1779,-360 1786.92,-348.34 1798.27,-338.23 1809.57,-330.07"/>
<polygon fill="black" stroke="black" points="1811.87,-332.74 1818.15,-324.2 1807.92,-326.96 1811.87,-332.74"/>
</g>
<!-- 114 -->
<g id="node88" class="node">
<title>114</title>
<polygon fill="none" stroke="black" points="1231,-1620 761,-1620 761,-1584 1231,-1584 1231,-1620"/>
<text text-anchor="middle" x="996" y="-1598.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#160;1 &#160;&#160;14 3072]| newshape=[1, 14, 3072], reverse=0)</text>
</g>
<!-- 112&#45;&gt;114 -->
<g id="edge87" class="edge">
<title>112&#45;&gt;114</title>
<path fill="none" stroke="black" d="M996,-1655.7C996,-1647.98 996,-1638.71 996,-1630.11"/>
<polygon fill="black" stroke="black" points="999.5,-1630.1 996,-1620.1 992.5,-1630.1 999.5,-1630.1"/>
</g>
<!-- 114&#45;&gt;115 -->
<g id="edge88" class="edge">
<title>114&#45;&gt;115</title>
<path fill="none" stroke="black" d="M1006.13,-1583.7C1010.85,-1575.64 1016.56,-1565.89 1021.78,-1556.98"/>
<polygon fill="black" stroke="black" points="1024.95,-1558.5 1026.98,-1548.1 1018.91,-1554.96 1024.95,-1558.5"/>
</g>
<!-- 119 -->
<g id="node90" class="node">
<title>119</title>
<polygon fill="none" stroke="black" points="1249.5,-1476 1064.5,-1476 1064.5,-1440 1249.5,-1440 1249.5,-1476"/>
<text text-anchor="middle" x="1157" y="-1454.3" font-family="Times,serif" font-size="14.00">multiply(·, 0.70710677)</text>
</g>
<!-- 115&#45;&gt;119 -->
<g id="edge90" class="edge">
<title>115&#45;&gt;119</title>
<path fill="none" stroke="black" d="M1066.36,-1511.88C1082.05,-1502.72 1101.56,-1491.34 1118.45,-1481.48"/>
<polygon fill="black" stroke="black" points="1120.66,-1484.25 1127.53,-1476.19 1117.13,-1478.21 1120.66,-1484.25"/>
</g>
<!-- 124 -->
<g id="node94" class="node">
<title>124</title>
<polygon fill="none" stroke="black" points="1178,-1188 1074,-1188 1074,-1152 1178,-1152 1178,-1188"/>
<text text-anchor="middle" x="1126" y="-1166.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 115&#45;&gt;124 -->
<g id="edge94" class="edge">
<title>115&#45;&gt;124</title>
<path fill="none" stroke="black" d="M1029.31,-1511.91C1018.21,-1485.62 999,-1433.45 999,-1387 999,-1387 999,-1387 999,-1313 999,-1270.97 1003.81,-1256.05 1031,-1224 1041.93,-1211.12 1056.81,-1200.77 1071.56,-1192.76"/>
<polygon fill="black" stroke="black" points="1073.39,-1195.75 1080.69,-1188.06 1070.19,-1189.52 1073.39,-1195.75"/>
</g>
<!-- 115&#45;&gt;148 -->
<g id="edge127" class="edge">
<title>115&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1036.43,-1511.69C1034.9,-1453.51 1033.15,-1266.29 1072,-1224 1108.23,-1184.56 1139.86,-1213.42 1187,-1188 1209.24,-1176.01 1208.45,-1163.39 1231,-1152 1288.73,-1122.83 1327.65,-1163.08 1372,-1116 1421.96,-1062.96 1403,-1027.87 1403,-955 1403,-955 1403,-955 1403,-449 1403,-394.3 1439.41,-385.11 1488,-360 1540.74,-332.75 1709.42,-317.04 1796.64,-310.54"/>
<polygon fill="black" stroke="black" points="1797.15,-314.02 1806.86,-309.8 1796.64,-307.04 1797.15,-314.02"/>
</g>
<!-- 120 -->
<g id="node91" class="node">
<title>120</title>
<polygon fill="none" stroke="black" points="1169,-1404 1115,-1404 1115,-1368 1169,-1368 1169,-1404"/>
<text text-anchor="middle" x="1142" y="-1382.3" font-family="Times,serif" font-size="14.00">erf(·)</text>
</g>
<!-- 119&#45;&gt;120 -->
<g id="edge91" class="edge">
<title>119&#45;&gt;120</title>
<path fill="none" stroke="black" d="M1153.29,-1439.7C1151.64,-1431.98 1149.65,-1422.71 1147.81,-1414.11"/>
<polygon fill="black" stroke="black" points="1151.18,-1413.15 1145.67,-1404.1 1144.34,-1414.62 1151.18,-1413.15"/>
</g>
<!-- 119&#45;&gt;148 -->
<g id="edge149" class="edge">
<title>119&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1249.79,-1452.53C1408.93,-1442.31 1717,-1410.22 1717,-1315 1717,-1315 1717,-1315 1717,-449 1717,-408.03 1714.9,-391.58 1741,-360 1755.47,-342.5 1777.11,-330.34 1797.3,-322.12"/>
<polygon fill="black" stroke="black" points="1798.66,-325.35 1806.75,-318.51 1796.16,-318.81 1798.66,-325.35"/>
</g>
<!-- 122 -->
<g id="node92" class="node">
<title>122</title>
<polygon fill="none" stroke="black" points="1187,-1332 1065,-1332 1065,-1296 1187,-1296 1187,-1332"/>
<text text-anchor="middle" x="1126" y="-1310.3" font-family="Times,serif" font-size="14.00">multiply(·, 0.5)</text>
</g>
<!-- 120&#45;&gt;122 -->
<g id="edge92" class="edge">
<title>120&#45;&gt;122</title>
<path fill="none" stroke="black" d="M1138.04,-1367.7C1136.28,-1359.98 1134.16,-1350.71 1132.2,-1342.11"/>
<polygon fill="black" stroke="black" points="1135.55,-1341.07 1129.91,-1332.1 1128.73,-1342.63 1135.55,-1341.07"/>
</g>
<!-- 123 -->
<g id="node93" class="node">
<title>123</title>
<polygon fill="none" stroke="black" points="1170.5,-1260 1081.5,-1260 1081.5,-1224 1170.5,-1224 1170.5,-1260"/>
<text text-anchor="middle" x="1126" y="-1238.3" font-family="Times,serif" font-size="14.00">add(0.5, ·)</text>
</g>
<!-- 122&#45;&gt;123 -->
<g id="edge93" class="edge">
<title>122&#45;&gt;123</title>
<path fill="none" stroke="black" d="M1126,-1295.7C1126,-1287.98 1126,-1278.71 1126,-1270.11"/>
<polygon fill="black" stroke="black" points="1129.5,-1270.1 1126,-1260.1 1122.5,-1270.1 1129.5,-1270.1"/>
</g>
<!-- 123&#45;&gt;124 -->
<g id="edge95" class="edge">
<title>123&#45;&gt;124</title>
<path fill="none" stroke="black" d="M1126,-1223.7C1126,-1215.98 1126,-1206.71 1126,-1198.11"/>
<polygon fill="black" stroke="black" points="1129.5,-1198.1 1126,-1188.1 1122.5,-1198.1 1129.5,-1198.1"/>
</g>
<!-- 123&#45;&gt;148 -->
<g id="edge148" class="edge">
<title>123&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1163.11,-1223.82C1183.51,-1213.99 1209.06,-1201.05 1231,-1188 1255.16,-1173.63 1257.63,-1164.1 1283,-1152 1334.41,-1127.47 1367.17,-1157.67 1406,-1116 1455.92,-1062.42 1441,-1028.23 1441,-955 1441,-955 1441,-955 1441,-449 1441,-374.56 1685.83,-330.31 1796.64,-314.09"/>
<polygon fill="black" stroke="black" points="1797.22,-317.54 1806.62,-312.65 1796.22,-310.61 1797.22,-317.54"/>
</g>
<!-- 126 -->
<g id="node95" class="node">
<title>126</title>
<polygon fill="none" stroke="black" points="1363.5,-1116 888.5,-1116 888.5,-1080 1363.5,-1080 1363.5,-1116"/>
<text text-anchor="middle" x="1126" y="-1094.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#45;1 &#160;&#160;14 3072]| newshape=[&#45;1, 14, 3072], reverse=0)</text>
</g>
<!-- 124&#45;&gt;126 -->
<g id="edge96" class="edge">
<title>124&#45;&gt;126</title>
<path fill="none" stroke="black" d="M1126,-1151.7C1126,-1143.98 1126,-1134.71 1126,-1126.11"/>
<polygon fill="black" stroke="black" points="1129.5,-1126.1 1126,-1116.1 1122.5,-1126.1 1129.5,-1126.1"/>
</g>
<!-- 131 -->
<g id="node99" class="node">
<title>131</title>
<polygon fill="none" stroke="black" points="1210.5,-1044 1041.5,-1044 1041.5,-1008 1210.5,-1008 1210.5,-1044"/>
<text text-anchor="middle" x="1126" y="-1022.3" font-family="Times,serif" font-size="14.00">nn.batch_matmul(·, ·)</text>
</g>
<!-- 126&#45;&gt;131 -->
<g id="edge100" class="edge">
<title>126&#45;&gt;131</title>
<path fill="none" stroke="black" d="M1126,-1079.7C1126,-1071.98 1126,-1062.71 1126,-1054.11"/>
<polygon fill="black" stroke="black" points="1129.5,-1054.1 1126,-1044.1 1122.5,-1054.1 1129.5,-1054.1"/>
</g>
<!-- 126&#45;&gt;148 -->
<g id="edge126" class="edge">
<title>126&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1220.46,-1079.91C1287.26,-1061.74 1365,-1025.47 1365,-955 1365,-955 1365,-955 1365,-449 1365,-408.03 1357.96,-386.73 1389,-360 1419.41,-333.81 1681.82,-316.28 1796.61,-309.8"/>
<polygon fill="black" stroke="black" points="1797.13,-313.27 1806.92,-309.22 1796.74,-306.28 1797.13,-313.27"/>
</g>
<!-- 129 -->
<g id="node97" class="node">
<title>129</title>
<polygon fill="none" stroke="black" points="1689.5,-1188 1292.5,-1188 1292.5,-1152 1689.5,-1152 1689.5,-1188"/>
<text text-anchor="middle" x="1491" y="-1166.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;&#45;1 3072 &#160;768]| newshape=..., reverse=0)</text>
</g>
<!-- 127&#45;&gt;129 -->
<g id="edge98" class="edge">
<title>127&#45;&gt;129</title>
<path fill="none" stroke="black" d="M1474.44,-1223.7C1476.89,-1215.9 1479.84,-1206.51 1482.57,-1197.83"/>
<polygon fill="black" stroke="black" points="1485.96,-1198.69 1485.62,-1188.1 1479.29,-1196.59 1485.96,-1198.69"/>
</g>
<!-- 130 -->
<g id="node98" class="node">
<title>130</title>
<polygon fill="none" stroke="black" points="1669.5,-1116 1458.5,-1116 1458.5,-1080 1669.5,-1080 1669.5,-1116"/>
<text text-anchor="middle" x="1564" y="-1094.3" font-family="Times,serif" font-size="14.00">transpose(·| axes=[0, 2, 1])</text>
</g>
<!-- 129&#45;&gt;130 -->
<g id="edge99" class="edge">
<title>129&#45;&gt;130</title>
<path fill="none" stroke="black" d="M1509.04,-1151.7C1518.06,-1143.05 1529.12,-1132.45 1538.94,-1123.03"/>
<polygon fill="black" stroke="black" points="1541.37,-1125.55 1546.16,-1116.1 1536.52,-1120.5 1541.37,-1125.55"/>
</g>
<!-- 130&#45;&gt;131 -->
<g id="edge101" class="edge">
<title>130&#45;&gt;131</title>
<path fill="none" stroke="black" d="M1458.25,-1080.1C1385.86,-1068.53 1290.7,-1053.32 1220.78,-1042.15"/>
<polygon fill="black" stroke="black" points="1221.08,-1038.65 1210.66,-1040.53 1219.98,-1045.56 1221.08,-1038.65"/>
</g>
<!-- 130&#45;&gt;148 -->
<g id="edge150" class="edge">
<title>130&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1576.02,-1079.99C1593.01,-1054.23 1622,-1003.2 1622,-955 1622,-955 1622,-955 1622,-449 1622,-367.05 1729.43,-330.7 1796.81,-315.96"/>
<polygon fill="black" stroke="black" points="1797.78,-319.33 1806.85,-313.86 1796.34,-312.48 1797.78,-319.33"/>
</g>
<!-- 132 -->
<g id="node100" class="node">
<title>132</title>
<polygon fill="none" stroke="black" points="1336.5,-972 893.5,-972 893.5,-936 1336.5,-936 1336.5,-972"/>
<text text-anchor="middle" x="1115" y="-950.3" font-family="Times,serif" font-size="14.00">reshape(·, [ &#160;1 &#160;14 768]| newshape=[1, 14, 768], reverse=0)</text>
</g>
<!-- 131&#45;&gt;132 -->
<g id="edge102" class="edge">
<title>131&#45;&gt;132</title>
<path fill="none" stroke="black" d="M1123.28,-1007.7C1122.07,-999.98 1120.61,-990.71 1119.26,-982.11"/>
<polygon fill="black" stroke="black" points="1122.7,-981.44 1117.69,-972.1 1115.78,-982.53 1122.7,-981.44"/>
</g>
<!-- 132&#45;&gt;133 -->
<g id="edge103" class="edge">
<title>132&#45;&gt;133</title>
<path fill="none" stroke="black" d="M1051.97,-935.97C1005.49,-923.4 943.34,-906.61 901.6,-895.33"/>
<polygon fill="black" stroke="black" points="902.28,-891.88 891.72,-892.65 900.46,-898.64 902.28,-891.88"/>
</g>
<!-- 136 -->
<g id="node103" class="node">
<title>136</title>
<polygon fill="none" stroke="black" points="908,-828 804,-828 804,-792 908,-792 908,-828"/>
<text text-anchor="middle" x="856" y="-806.3" font-family="Times,serif" font-size="14.00">multiply(·, ·)</text>
</g>
<!-- 133&#45;&gt;136 -->
<g id="edge106" class="edge">
<title>133&#45;&gt;136</title>
<path fill="none" stroke="black" d="M856,-863.7C856,-855.98 856,-846.71 856,-838.11"/>
<polygon fill="black" stroke="black" points="859.5,-838.1 856,-828.1 852.5,-838.1 859.5,-838.1"/>
</g>
<!-- 135&#45;&gt;136 -->
<g id="edge107" class="edge">
<title>135&#45;&gt;136</title>
<path fill="none" stroke="black" d="M744.96,-863.88C764.88,-854.39 789.82,-842.51 811.02,-832.42"/>
<polygon fill="black" stroke="black" points="812.63,-835.53 820.16,-828.07 809.62,-829.21 812.63,-835.53"/>
</g>
<!-- 135&#45;&gt;148 -->
<g id="edge151" class="edge">
<title>135&#45;&gt;148</title>
<path fill="none" stroke="black" d="M709,-863.95C709,-837.29 709,-784.11 709,-739 709,-739 709,-739 709,-449 709,-406.5 709.46,-384.77 744,-360 754.83,-352.24 1576.7,-318.16 1796.47,-309.18"/>
<polygon fill="black" stroke="black" points="1796.93,-312.66 1806.78,-308.76 1796.64,-305.67 1796.93,-312.66"/>
</g>
<!-- 136&#45;&gt;137 -->
<g id="edge108" class="edge">
<title>136&#45;&gt;137</title>
<path fill="none" stroke="black" d="M858.47,-791.7C859.57,-783.98 860.9,-774.71 862.13,-766.11"/>
<polygon fill="black" stroke="black" points="865.61,-766.5 863.56,-756.1 858.68,-765.51 865.61,-766.5"/>
</g>
<!-- 138 -->
<g id="node105" class="node">
<title>138</title>
<polygon fill="none" stroke="black" points="1225.5,-684 898.5,-684 898.5,-648 1225.5,-648 1225.5,-684"/>
<text text-anchor="middle" x="1062" y="-662.3" font-family="Times,serif" font-size="14.00">mean(·| axis=[&#45;1], keepdims=1, exclude=0)</text>
</g>
<!-- 137&#45;&gt;138 -->
<g id="edge110" class="edge">
<title>137&#45;&gt;138</title>
<path fill="none" stroke="black" d="M901.95,-724.16C930.62,-713.92 971.32,-699.39 1004.5,-687.54"/>
<polygon fill="black" stroke="black" points="1005.85,-690.77 1014.09,-684.11 1003.49,-684.18 1005.85,-690.77"/>
</g>
<!-- 139 -->
<g id="node106" class="node">
<title>139</title>
<polygon fill="none" stroke="black" points="842.5,-612 737.5,-612 737.5,-576 842.5,-576 842.5,-612"/>
<text text-anchor="middle" x="790" y="-590.3" font-family="Times,serif" font-size="14.00">subtract(·, ·)</text>
</g>
<!-- 137&#45;&gt;139 -->
<g id="edge111" class="edge">
<title>137&#45;&gt;139</title>
<path fill="none" stroke="black" d="M856.83,-719.87C843.71,-695.35 819.45,-650.03 804.05,-621.26"/>
<polygon fill="black" stroke="black" points="807,-619.35 799.2,-612.19 800.83,-622.66 807,-619.35"/>
</g>
<!-- 140 -->
<g id="node107" class="node">
<title>140</title>
<polygon fill="none" stroke="black" points="1261,-612 899,-612 899,-576 1261,-576 1261,-612"/>
<text text-anchor="middle" x="1080" y="-590.3" font-family="Times,serif" font-size="14.00">variance(·, ·| axis=[&#45;1], keepdims=1, exclude=0)</text>
</g>
<!-- 137&#45;&gt;140 -->
<g id="edge113" class="edge">
<title>137&#45;&gt;140</title>
<path fill="none" stroke="black" d="M901.58,-737.49C985.35,-737.82 1191.15,-733.46 1234,-684 1261.2,-652.6 1217.28,-629.62 1169.48,-614.89"/>
<polygon fill="black" stroke="black" points="1170.47,-611.53 1159.89,-612.05 1168.49,-618.24 1170.47,-611.53"/>
</g>
<!-- 137&#45;&gt;148 -->
<g id="edge125" class="edge">
<title>137&#45;&gt;148</title>
<path fill="none" stroke="black" d="M866.99,-719.96C868.44,-693.31 871,-640.14 871,-595 871,-595 871,-595 871,-449 871,-326.44 1011.91,-384.49 1132,-360 1259.1,-334.08 1652.07,-315.35 1796.25,-309.19"/>
<polygon fill="black" stroke="black" points="1796.76,-312.67 1806.6,-308.75 1796.46,-305.68 1796.76,-312.67"/>
</g>
<!-- 138&#45;&gt;139 -->
<g id="edge112" class="edge">
<title>138&#45;&gt;139</title>
<path fill="none" stroke="black" d="M995.81,-647.97C952.19,-636.74 895.42,-622.13 852.56,-611.1"/>
<polygon fill="black" stroke="black" points="853.37,-607.69 842.81,-608.59 851.62,-614.47 853.37,-607.69"/>
</g>
<!-- 138&#45;&gt;140 -->
<g id="edge114" class="edge">
<title>138&#45;&gt;140</title>
<path fill="none" stroke="black" d="M1066.45,-647.7C1068.45,-639.9 1070.87,-630.51 1073.1,-621.83"/>
<polygon fill="black" stroke="black" points="1076.5,-622.66 1075.6,-612.1 1069.72,-620.92 1076.5,-622.66"/>
</g>
<!-- 138&#45;&gt;148 -->
<g id="edge152" class="edge">
<title>138&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1171.1,-647.95C1213.54,-639.17 1255.47,-627.11 1270,-612 1298.03,-582.85 1289,-563.45 1289,-523 1289,-523 1289,-523 1289,-449 1289,-408.03 1281.74,-386.48 1313,-360 1349.41,-329.15 1668.09,-313.83 1796.61,-308.87"/>
<polygon fill="black" stroke="black" points="1797.13,-312.35 1806.99,-308.47 1796.86,-305.35 1797.13,-312.35"/>
</g>
<!-- 144 -->
<g id="node110" class="node">
<title>144</title>
<polygon fill="none" stroke="black" points="842.5,-396 753.5,-396 753.5,-360 842.5,-360 842.5,-396"/>
<text text-anchor="middle" x="798" y="-374.3" font-family="Times,serif" font-size="14.00">divide(·, ·)</text>
</g>
<!-- 139&#45;&gt;144 -->
<g id="edge117" class="edge">
<title>139&#45;&gt;144</title>
<path fill="none" stroke="black" d="M790.64,-575.85C792.02,-538.83 795.3,-451.18 796.98,-406.39"/>
<polygon fill="black" stroke="black" points="800.48,-406.36 797.36,-396.23 793.48,-406.09 800.48,-406.36"/>
</g>
<!-- 141 -->
<g id="node108" class="node">
<title>141</title>
<polygon fill="none" stroke="black" points="1110.5,-540 1049.5,-540 1049.5,-504 1110.5,-504 1110.5,-540"/>
<text text-anchor="middle" x="1080" y="-518.3" font-family="Times,serif" font-size="14.00">sqrt(·)</text>
</g>
<!-- 140&#45;&gt;141 -->
<g id="edge115" class="edge">
<title>140&#45;&gt;141</title>
<path fill="none" stroke="black" d="M1080,-575.7C1080,-567.98 1080,-558.71 1080,-550.11"/>
<polygon fill="black" stroke="black" points="1083.5,-550.1 1080,-540.1 1076.5,-550.1 1083.5,-550.1"/>
</g>
<!-- 140&#45;&gt;148 -->
<g id="edge154" class="edge">
<title>140&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1093.11,-575.87C1132.76,-524.25 1250.24,-372.98 1275,-360 1364.81,-312.91 1671.78,-307.15 1796.59,-306.8"/>
<polygon fill="black" stroke="black" points="1796.68,-310.3 1806.67,-306.78 1796.67,-303.3 1796.68,-310.3"/>
</g>
<!-- 143 -->
<g id="node109" class="node">
<title>143</title>
<polygon fill="none" stroke="black" points="1133.5,-468 1026.5,-468 1026.5,-432 1133.5,-432 1133.5,-468"/>
<text text-anchor="middle" x="1080" y="-446.3" font-family="Times,serif" font-size="14.00">add(·, 1e&#45;12)</text>
</g>
<!-- 141&#45;&gt;143 -->
<g id="edge116" class="edge">
<title>141&#45;&gt;143</title>
<path fill="none" stroke="black" d="M1080,-503.7C1080,-495.98 1080,-486.71 1080,-478.11"/>
<polygon fill="black" stroke="black" points="1083.5,-478.1 1080,-468.1 1076.5,-478.1 1083.5,-478.1"/>
</g>
<!-- 143&#45;&gt;144 -->
<g id="edge118" class="edge">
<title>143&#45;&gt;144</title>
<path fill="none" stroke="black" d="M1026.29,-435.67C976.51,-423.31 902.79,-405.01 852.69,-392.58"/>
<polygon fill="black" stroke="black" points="853.24,-389.1 842.69,-390.09 851.55,-395.9 853.24,-389.1"/>
</g>
<!-- 143&#45;&gt;148 -->
<g id="edge153" class="edge">
<title>143&#45;&gt;148</title>
<path fill="none" stroke="black" d="M1095.08,-431.97C1114.94,-410.87 1152.03,-375.77 1192,-360 1247.63,-338.04 1650.02,-316.66 1796.69,-309.51"/>
<polygon fill="black" stroke="black" points="1796.98,-313 1806.8,-309.02 1796.65,-306.01 1796.98,-313"/>
</g>
<!-- 144&#45;&gt;145 -->
<g id="edge119" class="edge">
<title>144&#45;&gt;145</title>
<path fill="none" stroke="black" d="M798,-359.7C798,-351.98 798,-342.71 798,-334.11"/>
<polygon fill="black" stroke="black" points="801.5,-334.1 798,-324.1 794.5,-334.1 801.5,-334.1"/>
</g>
<!-- 144&#45;&gt;148 -->
<g id="edge124" class="edge">
<title>144&#45;&gt;148</title>
<path fill="none" stroke="black" d="M842.53,-363.02C847.38,-361.84 852.27,-360.8 857,-360 949.72,-344.38 1603.83,-316.96 1796.64,-309.14"/>
<polygon fill="black" stroke="black" points="1796.95,-312.63 1806.8,-308.73 1796.67,-305.63 1796.95,-312.63"/>
</g>
<!-- 145&#45;&gt;146 -->
<g id="edge121" class="edge">
<title>145&#45;&gt;146</title>
<path fill="none" stroke="black" d="M850.04,-295.41C924.82,-281.64 1061.47,-256.47 1132.46,-243.39"/>
<polygon fill="black" stroke="black" points="1133.24,-246.8 1142.45,-241.55 1131.98,-239.92 1133.24,-246.8"/>
</g>
<!-- 147 -->
<g id="node113" class="node">
<title>147</title>
<polygon fill="none" stroke="black" points="1547,-180 1461,-180 1461,-144 1547,-144 1547,-180"/>
<text text-anchor="middle" x="1504" y="-158.3" font-family="Times,serif" font-size="14.00">Tuple[...])</text>
</g>
<!-- 146&#45;&gt;147 -->
<g id="edge123" class="edge">
<title>146&#45;&gt;147</title>
<path fill="none" stroke="black" d="M1213.51,-225.38C1270.8,-213.07 1383.96,-188.78 1450.88,-174.41"/>
<polygon fill="black" stroke="black" points="1451.77,-177.8 1460.81,-172.27 1450.3,-170.95 1451.77,-177.8"/>
</g>
<!-- 149 -->
<g id="node115" class="node">
<title>149</title>
<polygon fill="none" stroke="black" points="1582,-108 1496,-108 1496,-72 1582,-72 1582,-108"/>
<text text-anchor="middle" x="1539" y="-86.3" font-family="Times,serif" font-size="14.00">Tuple[...])</text>
</g>
<!-- 147&#45;&gt;149 -->
<g id="edge155" class="edge">
<title>147&#45;&gt;149</title>
<path fill="none" stroke="black" d="M1512.65,-143.7C1516.64,-135.73 1521.45,-126.1 1525.87,-117.26"/>
<polygon fill="black" stroke="black" points="1529.11,-118.61 1530.45,-108.1 1522.85,-115.48 1529.11,-118.61"/>
</g>
<!-- 148&#45;&gt;149 -->
<g id="edge156" class="edge">
<title>148&#45;&gt;149</title>
<path fill="none" stroke="black" d="M1825.08,-287.85C1769.46,-249.58 1635.2,-157.19 1572.41,-113.99"/>
<polygon fill="black" stroke="black" points="1574.27,-111.02 1564.04,-108.23 1570.3,-116.78 1574.27,-111.02"/>
</g>
<!-- 150 -->
<g id="node116" class="node">
<title>150</title>
<polygon fill="none" stroke="black" points="1579,-36 1499,-36 1499,0 1579,0 1579,-36"/>
<text text-anchor="middle" x="1539" y="-14.3" font-family="Times,serif" font-size="14.00">Function</text>
</g>
<!-- 149&#45;&gt;150 -->
<g id="edge157" class="edge">
<title>149&#45;&gt;150</title>
<path fill="none" stroke="black" d="M1539,-71.7C1539,-63.98 1539,-54.71 1539,-46.11"/>
<polygon fill="black" stroke="black" points="1542.5,-46.1 1539,-36.1 1535.5,-46.1 1542.5,-46.1"/>
</g>
</g>
</svg>