blob: 5a4363f33459f4019da8f2995f4eae2df447c295 [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `/root/.cargo/registry/src/github.com-1ecc6299db9ec823/aho-corasick-1.0.2/src/nfa/noncontiguous.rs`."><meta name="keywords" content="rust, rustlang, rust-lang"><title>noncontiguous.rs - source</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceSerif4-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../FiraSans-Regular.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../FiraSans-Medium.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceCodePro-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceSerif4-Bold.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceCodePro-Semibold.ttf.woff2"><link rel="stylesheet" href="../../../normalize.css"><link rel="stylesheet" href="../../../rustdoc.css" id="mainThemeStyle"><link rel="stylesheet" href="../../../ayu.css" disabled><link rel="stylesheet" href="../../../dark.css" disabled><link rel="stylesheet" href="../../../light.css" id="themeStyle"><script id="default-settings" ></script><script src="../../../storage.js"></script><script defer src="../../../source-script.js"></script><script defer src="../../../source-files.js"></script><script defer src="../../../main.js"></script><noscript><link rel="stylesheet" href="../../../noscript.css"></noscript><link rel="alternate icon" type="image/png" href="../../../favicon-16x16.png"><link rel="alternate icon" type="image/png" href="../../../favicon-32x32.png"><link rel="icon" type="image/svg+xml" href="../../../favicon.svg"></head><body class="rustdoc source"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><a class="sidebar-logo" href="../../../aho_corasick/index.html"><div class="logo-container"><img class="rust-logo" src="../../../rust-logo.svg" alt="logo"></div></a></nav><main><div class="width-limiter"><nav class="sub"><a class="sub-logo-container" href="../../../aho_corasick/index.html"><img class="rust-logo" src="../../../rust-logo.svg" alt="logo"></a><form class="search-form"><div class="search-container"><span></span><input class="search-input" name="search" autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to search, ‘?’ for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../../../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../../../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../../../wheel.svg"></a></div></div></form></nav><section id="main-content" class="content"><div class="example-wrap"><pre class="src-line-numbers"><span id="1">1</span>
<span id="2">2</span>
<span id="3">3</span>
<span id="4">4</span>
<span id="5">5</span>
<span id="6">6</span>
<span id="7">7</span>
<span id="8">8</span>
<span id="9">9</span>
<span id="10">10</span>
<span id="11">11</span>
<span id="12">12</span>
<span id="13">13</span>
<span id="14">14</span>
<span id="15">15</span>
<span id="16">16</span>
<span id="17">17</span>
<span id="18">18</span>
<span id="19">19</span>
<span id="20">20</span>
<span id="21">21</span>
<span id="22">22</span>
<span id="23">23</span>
<span id="24">24</span>
<span id="25">25</span>
<span id="26">26</span>
<span id="27">27</span>
<span id="28">28</span>
<span id="29">29</span>
<span id="30">30</span>
<span id="31">31</span>
<span id="32">32</span>
<span id="33">33</span>
<span id="34">34</span>
<span id="35">35</span>
<span id="36">36</span>
<span id="37">37</span>
<span id="38">38</span>
<span id="39">39</span>
<span id="40">40</span>
<span id="41">41</span>
<span id="42">42</span>
<span id="43">43</span>
<span id="44">44</span>
<span id="45">45</span>
<span id="46">46</span>
<span id="47">47</span>
<span id="48">48</span>
<span id="49">49</span>
<span id="50">50</span>
<span id="51">51</span>
<span id="52">52</span>
<span id="53">53</span>
<span id="54">54</span>
<span id="55">55</span>
<span id="56">56</span>
<span id="57">57</span>
<span id="58">58</span>
<span id="59">59</span>
<span id="60">60</span>
<span id="61">61</span>
<span id="62">62</span>
<span id="63">63</span>
<span id="64">64</span>
<span id="65">65</span>
<span id="66">66</span>
<span id="67">67</span>
<span id="68">68</span>
<span id="69">69</span>
<span id="70">70</span>
<span id="71">71</span>
<span id="72">72</span>
<span id="73">73</span>
<span id="74">74</span>
<span id="75">75</span>
<span id="76">76</span>
<span id="77">77</span>
<span id="78">78</span>
<span id="79">79</span>
<span id="80">80</span>
<span id="81">81</span>
<span id="82">82</span>
<span id="83">83</span>
<span id="84">84</span>
<span id="85">85</span>
<span id="86">86</span>
<span id="87">87</span>
<span id="88">88</span>
<span id="89">89</span>
<span id="90">90</span>
<span id="91">91</span>
<span id="92">92</span>
<span id="93">93</span>
<span id="94">94</span>
<span id="95">95</span>
<span id="96">96</span>
<span id="97">97</span>
<span id="98">98</span>
<span id="99">99</span>
<span id="100">100</span>
<span id="101">101</span>
<span id="102">102</span>
<span id="103">103</span>
<span id="104">104</span>
<span id="105">105</span>
<span id="106">106</span>
<span id="107">107</span>
<span id="108">108</span>
<span id="109">109</span>
<span id="110">110</span>
<span id="111">111</span>
<span id="112">112</span>
<span id="113">113</span>
<span id="114">114</span>
<span id="115">115</span>
<span id="116">116</span>
<span id="117">117</span>
<span id="118">118</span>
<span id="119">119</span>
<span id="120">120</span>
<span id="121">121</span>
<span id="122">122</span>
<span id="123">123</span>
<span id="124">124</span>
<span id="125">125</span>
<span id="126">126</span>
<span id="127">127</span>
<span id="128">128</span>
<span id="129">129</span>
<span id="130">130</span>
<span id="131">131</span>
<span id="132">132</span>
<span id="133">133</span>
<span id="134">134</span>
<span id="135">135</span>
<span id="136">136</span>
<span id="137">137</span>
<span id="138">138</span>
<span id="139">139</span>
<span id="140">140</span>
<span id="141">141</span>
<span id="142">142</span>
<span id="143">143</span>
<span id="144">144</span>
<span id="145">145</span>
<span id="146">146</span>
<span id="147">147</span>
<span id="148">148</span>
<span id="149">149</span>
<span id="150">150</span>
<span id="151">151</span>
<span id="152">152</span>
<span id="153">153</span>
<span id="154">154</span>
<span id="155">155</span>
<span id="156">156</span>
<span id="157">157</span>
<span id="158">158</span>
<span id="159">159</span>
<span id="160">160</span>
<span id="161">161</span>
<span id="162">162</span>
<span id="163">163</span>
<span id="164">164</span>
<span id="165">165</span>
<span id="166">166</span>
<span id="167">167</span>
<span id="168">168</span>
<span id="169">169</span>
<span id="170">170</span>
<span id="171">171</span>
<span id="172">172</span>
<span id="173">173</span>
<span id="174">174</span>
<span id="175">175</span>
<span id="176">176</span>
<span id="177">177</span>
<span id="178">178</span>
<span id="179">179</span>
<span id="180">180</span>
<span id="181">181</span>
<span id="182">182</span>
<span id="183">183</span>
<span id="184">184</span>
<span id="185">185</span>
<span id="186">186</span>
<span id="187">187</span>
<span id="188">188</span>
<span id="189">189</span>
<span id="190">190</span>
<span id="191">191</span>
<span id="192">192</span>
<span id="193">193</span>
<span id="194">194</span>
<span id="195">195</span>
<span id="196">196</span>
<span id="197">197</span>
<span id="198">198</span>
<span id="199">199</span>
<span id="200">200</span>
<span id="201">201</span>
<span id="202">202</span>
<span id="203">203</span>
<span id="204">204</span>
<span id="205">205</span>
<span id="206">206</span>
<span id="207">207</span>
<span id="208">208</span>
<span id="209">209</span>
<span id="210">210</span>
<span id="211">211</span>
<span id="212">212</span>
<span id="213">213</span>
<span id="214">214</span>
<span id="215">215</span>
<span id="216">216</span>
<span id="217">217</span>
<span id="218">218</span>
<span id="219">219</span>
<span id="220">220</span>
<span id="221">221</span>
<span id="222">222</span>
<span id="223">223</span>
<span id="224">224</span>
<span id="225">225</span>
<span id="226">226</span>
<span id="227">227</span>
<span id="228">228</span>
<span id="229">229</span>
<span id="230">230</span>
<span id="231">231</span>
<span id="232">232</span>
<span id="233">233</span>
<span id="234">234</span>
<span id="235">235</span>
<span id="236">236</span>
<span id="237">237</span>
<span id="238">238</span>
<span id="239">239</span>
<span id="240">240</span>
<span id="241">241</span>
<span id="242">242</span>
<span id="243">243</span>
<span id="244">244</span>
<span id="245">245</span>
<span id="246">246</span>
<span id="247">247</span>
<span id="248">248</span>
<span id="249">249</span>
<span id="250">250</span>
<span id="251">251</span>
<span id="252">252</span>
<span id="253">253</span>
<span id="254">254</span>
<span id="255">255</span>
<span id="256">256</span>
<span id="257">257</span>
<span id="258">258</span>
<span id="259">259</span>
<span id="260">260</span>
<span id="261">261</span>
<span id="262">262</span>
<span id="263">263</span>
<span id="264">264</span>
<span id="265">265</span>
<span id="266">266</span>
<span id="267">267</span>
<span id="268">268</span>
<span id="269">269</span>
<span id="270">270</span>
<span id="271">271</span>
<span id="272">272</span>
<span id="273">273</span>
<span id="274">274</span>
<span id="275">275</span>
<span id="276">276</span>
<span id="277">277</span>
<span id="278">278</span>
<span id="279">279</span>
<span id="280">280</span>
<span id="281">281</span>
<span id="282">282</span>
<span id="283">283</span>
<span id="284">284</span>
<span id="285">285</span>
<span id="286">286</span>
<span id="287">287</span>
<span id="288">288</span>
<span id="289">289</span>
<span id="290">290</span>
<span id="291">291</span>
<span id="292">292</span>
<span id="293">293</span>
<span id="294">294</span>
<span id="295">295</span>
<span id="296">296</span>
<span id="297">297</span>
<span id="298">298</span>
<span id="299">299</span>
<span id="300">300</span>
<span id="301">301</span>
<span id="302">302</span>
<span id="303">303</span>
<span id="304">304</span>
<span id="305">305</span>
<span id="306">306</span>
<span id="307">307</span>
<span id="308">308</span>
<span id="309">309</span>
<span id="310">310</span>
<span id="311">311</span>
<span id="312">312</span>
<span id="313">313</span>
<span id="314">314</span>
<span id="315">315</span>
<span id="316">316</span>
<span id="317">317</span>
<span id="318">318</span>
<span id="319">319</span>
<span id="320">320</span>
<span id="321">321</span>
<span id="322">322</span>
<span id="323">323</span>
<span id="324">324</span>
<span id="325">325</span>
<span id="326">326</span>
<span id="327">327</span>
<span id="328">328</span>
<span id="329">329</span>
<span id="330">330</span>
<span id="331">331</span>
<span id="332">332</span>
<span id="333">333</span>
<span id="334">334</span>
<span id="335">335</span>
<span id="336">336</span>
<span id="337">337</span>
<span id="338">338</span>
<span id="339">339</span>
<span id="340">340</span>
<span id="341">341</span>
<span id="342">342</span>
<span id="343">343</span>
<span id="344">344</span>
<span id="345">345</span>
<span id="346">346</span>
<span id="347">347</span>
<span id="348">348</span>
<span id="349">349</span>
<span id="350">350</span>
<span id="351">351</span>
<span id="352">352</span>
<span id="353">353</span>
<span id="354">354</span>
<span id="355">355</span>
<span id="356">356</span>
<span id="357">357</span>
<span id="358">358</span>
<span id="359">359</span>
<span id="360">360</span>
<span id="361">361</span>
<span id="362">362</span>
<span id="363">363</span>
<span id="364">364</span>
<span id="365">365</span>
<span id="366">366</span>
<span id="367">367</span>
<span id="368">368</span>
<span id="369">369</span>
<span id="370">370</span>
<span id="371">371</span>
<span id="372">372</span>
<span id="373">373</span>
<span id="374">374</span>
<span id="375">375</span>
<span id="376">376</span>
<span id="377">377</span>
<span id="378">378</span>
<span id="379">379</span>
<span id="380">380</span>
<span id="381">381</span>
<span id="382">382</span>
<span id="383">383</span>
<span id="384">384</span>
<span id="385">385</span>
<span id="386">386</span>
<span id="387">387</span>
<span id="388">388</span>
<span id="389">389</span>
<span id="390">390</span>
<span id="391">391</span>
<span id="392">392</span>
<span id="393">393</span>
<span id="394">394</span>
<span id="395">395</span>
<span id="396">396</span>
<span id="397">397</span>
<span id="398">398</span>
<span id="399">399</span>
<span id="400">400</span>
<span id="401">401</span>
<span id="402">402</span>
<span id="403">403</span>
<span id="404">404</span>
<span id="405">405</span>
<span id="406">406</span>
<span id="407">407</span>
<span id="408">408</span>
<span id="409">409</span>
<span id="410">410</span>
<span id="411">411</span>
<span id="412">412</span>
<span id="413">413</span>
<span id="414">414</span>
<span id="415">415</span>
<span id="416">416</span>
<span id="417">417</span>
<span id="418">418</span>
<span id="419">419</span>
<span id="420">420</span>
<span id="421">421</span>
<span id="422">422</span>
<span id="423">423</span>
<span id="424">424</span>
<span id="425">425</span>
<span id="426">426</span>
<span id="427">427</span>
<span id="428">428</span>
<span id="429">429</span>
<span id="430">430</span>
<span id="431">431</span>
<span id="432">432</span>
<span id="433">433</span>
<span id="434">434</span>
<span id="435">435</span>
<span id="436">436</span>
<span id="437">437</span>
<span id="438">438</span>
<span id="439">439</span>
<span id="440">440</span>
<span id="441">441</span>
<span id="442">442</span>
<span id="443">443</span>
<span id="444">444</span>
<span id="445">445</span>
<span id="446">446</span>
<span id="447">447</span>
<span id="448">448</span>
<span id="449">449</span>
<span id="450">450</span>
<span id="451">451</span>
<span id="452">452</span>
<span id="453">453</span>
<span id="454">454</span>
<span id="455">455</span>
<span id="456">456</span>
<span id="457">457</span>
<span id="458">458</span>
<span id="459">459</span>
<span id="460">460</span>
<span id="461">461</span>
<span id="462">462</span>
<span id="463">463</span>
<span id="464">464</span>
<span id="465">465</span>
<span id="466">466</span>
<span id="467">467</span>
<span id="468">468</span>
<span id="469">469</span>
<span id="470">470</span>
<span id="471">471</span>
<span id="472">472</span>
<span id="473">473</span>
<span id="474">474</span>
<span id="475">475</span>
<span id="476">476</span>
<span id="477">477</span>
<span id="478">478</span>
<span id="479">479</span>
<span id="480">480</span>
<span id="481">481</span>
<span id="482">482</span>
<span id="483">483</span>
<span id="484">484</span>
<span id="485">485</span>
<span id="486">486</span>
<span id="487">487</span>
<span id="488">488</span>
<span id="489">489</span>
<span id="490">490</span>
<span id="491">491</span>
<span id="492">492</span>
<span id="493">493</span>
<span id="494">494</span>
<span id="495">495</span>
<span id="496">496</span>
<span id="497">497</span>
<span id="498">498</span>
<span id="499">499</span>
<span id="500">500</span>
<span id="501">501</span>
<span id="502">502</span>
<span id="503">503</span>
<span id="504">504</span>
<span id="505">505</span>
<span id="506">506</span>
<span id="507">507</span>
<span id="508">508</span>
<span id="509">509</span>
<span id="510">510</span>
<span id="511">511</span>
<span id="512">512</span>
<span id="513">513</span>
<span id="514">514</span>
<span id="515">515</span>
<span id="516">516</span>
<span id="517">517</span>
<span id="518">518</span>
<span id="519">519</span>
<span id="520">520</span>
<span id="521">521</span>
<span id="522">522</span>
<span id="523">523</span>
<span id="524">524</span>
<span id="525">525</span>
<span id="526">526</span>
<span id="527">527</span>
<span id="528">528</span>
<span id="529">529</span>
<span id="530">530</span>
<span id="531">531</span>
<span id="532">532</span>
<span id="533">533</span>
<span id="534">534</span>
<span id="535">535</span>
<span id="536">536</span>
<span id="537">537</span>
<span id="538">538</span>
<span id="539">539</span>
<span id="540">540</span>
<span id="541">541</span>
<span id="542">542</span>
<span id="543">543</span>
<span id="544">544</span>
<span id="545">545</span>
<span id="546">546</span>
<span id="547">547</span>
<span id="548">548</span>
<span id="549">549</span>
<span id="550">550</span>
<span id="551">551</span>
<span id="552">552</span>
<span id="553">553</span>
<span id="554">554</span>
<span id="555">555</span>
<span id="556">556</span>
<span id="557">557</span>
<span id="558">558</span>
<span id="559">559</span>
<span id="560">560</span>
<span id="561">561</span>
<span id="562">562</span>
<span id="563">563</span>
<span id="564">564</span>
<span id="565">565</span>
<span id="566">566</span>
<span id="567">567</span>
<span id="568">568</span>
<span id="569">569</span>
<span id="570">570</span>
<span id="571">571</span>
<span id="572">572</span>
<span id="573">573</span>
<span id="574">574</span>
<span id="575">575</span>
<span id="576">576</span>
<span id="577">577</span>
<span id="578">578</span>
<span id="579">579</span>
<span id="580">580</span>
<span id="581">581</span>
<span id="582">582</span>
<span id="583">583</span>
<span id="584">584</span>
<span id="585">585</span>
<span id="586">586</span>
<span id="587">587</span>
<span id="588">588</span>
<span id="589">589</span>
<span id="590">590</span>
<span id="591">591</span>
<span id="592">592</span>
<span id="593">593</span>
<span id="594">594</span>
<span id="595">595</span>
<span id="596">596</span>
<span id="597">597</span>
<span id="598">598</span>
<span id="599">599</span>
<span id="600">600</span>
<span id="601">601</span>
<span id="602">602</span>
<span id="603">603</span>
<span id="604">604</span>
<span id="605">605</span>
<span id="606">606</span>
<span id="607">607</span>
<span id="608">608</span>
<span id="609">609</span>
<span id="610">610</span>
<span id="611">611</span>
<span id="612">612</span>
<span id="613">613</span>
<span id="614">614</span>
<span id="615">615</span>
<span id="616">616</span>
<span id="617">617</span>
<span id="618">618</span>
<span id="619">619</span>
<span id="620">620</span>
<span id="621">621</span>
<span id="622">622</span>
<span id="623">623</span>
<span id="624">624</span>
<span id="625">625</span>
<span id="626">626</span>
<span id="627">627</span>
<span id="628">628</span>
<span id="629">629</span>
<span id="630">630</span>
<span id="631">631</span>
<span id="632">632</span>
<span id="633">633</span>
<span id="634">634</span>
<span id="635">635</span>
<span id="636">636</span>
<span id="637">637</span>
<span id="638">638</span>
<span id="639">639</span>
<span id="640">640</span>
<span id="641">641</span>
<span id="642">642</span>
<span id="643">643</span>
<span id="644">644</span>
<span id="645">645</span>
<span id="646">646</span>
<span id="647">647</span>
<span id="648">648</span>
<span id="649">649</span>
<span id="650">650</span>
<span id="651">651</span>
<span id="652">652</span>
<span id="653">653</span>
<span id="654">654</span>
<span id="655">655</span>
<span id="656">656</span>
<span id="657">657</span>
<span id="658">658</span>
<span id="659">659</span>
<span id="660">660</span>
<span id="661">661</span>
<span id="662">662</span>
<span id="663">663</span>
<span id="664">664</span>
<span id="665">665</span>
<span id="666">666</span>
<span id="667">667</span>
<span id="668">668</span>
<span id="669">669</span>
<span id="670">670</span>
<span id="671">671</span>
<span id="672">672</span>
<span id="673">673</span>
<span id="674">674</span>
<span id="675">675</span>
<span id="676">676</span>
<span id="677">677</span>
<span id="678">678</span>
<span id="679">679</span>
<span id="680">680</span>
<span id="681">681</span>
<span id="682">682</span>
<span id="683">683</span>
<span id="684">684</span>
<span id="685">685</span>
<span id="686">686</span>
<span id="687">687</span>
<span id="688">688</span>
<span id="689">689</span>
<span id="690">690</span>
<span id="691">691</span>
<span id="692">692</span>
<span id="693">693</span>
<span id="694">694</span>
<span id="695">695</span>
<span id="696">696</span>
<span id="697">697</span>
<span id="698">698</span>
<span id="699">699</span>
<span id="700">700</span>
<span id="701">701</span>
<span id="702">702</span>
<span id="703">703</span>
<span id="704">704</span>
<span id="705">705</span>
<span id="706">706</span>
<span id="707">707</span>
<span id="708">708</span>
<span id="709">709</span>
<span id="710">710</span>
<span id="711">711</span>
<span id="712">712</span>
<span id="713">713</span>
<span id="714">714</span>
<span id="715">715</span>
<span id="716">716</span>
<span id="717">717</span>
<span id="718">718</span>
<span id="719">719</span>
<span id="720">720</span>
<span id="721">721</span>
<span id="722">722</span>
<span id="723">723</span>
<span id="724">724</span>
<span id="725">725</span>
<span id="726">726</span>
<span id="727">727</span>
<span id="728">728</span>
<span id="729">729</span>
<span id="730">730</span>
<span id="731">731</span>
<span id="732">732</span>
<span id="733">733</span>
<span id="734">734</span>
<span id="735">735</span>
<span id="736">736</span>
<span id="737">737</span>
<span id="738">738</span>
<span id="739">739</span>
<span id="740">740</span>
<span id="741">741</span>
<span id="742">742</span>
<span id="743">743</span>
<span id="744">744</span>
<span id="745">745</span>
<span id="746">746</span>
<span id="747">747</span>
<span id="748">748</span>
<span id="749">749</span>
<span id="750">750</span>
<span id="751">751</span>
<span id="752">752</span>
<span id="753">753</span>
<span id="754">754</span>
<span id="755">755</span>
<span id="756">756</span>
<span id="757">757</span>
<span id="758">758</span>
<span id="759">759</span>
<span id="760">760</span>
<span id="761">761</span>
<span id="762">762</span>
<span id="763">763</span>
<span id="764">764</span>
<span id="765">765</span>
<span id="766">766</span>
<span id="767">767</span>
<span id="768">768</span>
<span id="769">769</span>
<span id="770">770</span>
<span id="771">771</span>
<span id="772">772</span>
<span id="773">773</span>
<span id="774">774</span>
<span id="775">775</span>
<span id="776">776</span>
<span id="777">777</span>
<span id="778">778</span>
<span id="779">779</span>
<span id="780">780</span>
<span id="781">781</span>
<span id="782">782</span>
<span id="783">783</span>
<span id="784">784</span>
<span id="785">785</span>
<span id="786">786</span>
<span id="787">787</span>
<span id="788">788</span>
<span id="789">789</span>
<span id="790">790</span>
<span id="791">791</span>
<span id="792">792</span>
<span id="793">793</span>
<span id="794">794</span>
<span id="795">795</span>
<span id="796">796</span>
<span id="797">797</span>
<span id="798">798</span>
<span id="799">799</span>
<span id="800">800</span>
<span id="801">801</span>
<span id="802">802</span>
<span id="803">803</span>
<span id="804">804</span>
<span id="805">805</span>
<span id="806">806</span>
<span id="807">807</span>
<span id="808">808</span>
<span id="809">809</span>
<span id="810">810</span>
<span id="811">811</span>
<span id="812">812</span>
<span id="813">813</span>
<span id="814">814</span>
<span id="815">815</span>
<span id="816">816</span>
<span id="817">817</span>
<span id="818">818</span>
<span id="819">819</span>
<span id="820">820</span>
<span id="821">821</span>
<span id="822">822</span>
<span id="823">823</span>
<span id="824">824</span>
<span id="825">825</span>
<span id="826">826</span>
<span id="827">827</span>
<span id="828">828</span>
<span id="829">829</span>
<span id="830">830</span>
<span id="831">831</span>
<span id="832">832</span>
<span id="833">833</span>
<span id="834">834</span>
<span id="835">835</span>
<span id="836">836</span>
<span id="837">837</span>
<span id="838">838</span>
<span id="839">839</span>
<span id="840">840</span>
<span id="841">841</span>
<span id="842">842</span>
<span id="843">843</span>
<span id="844">844</span>
<span id="845">845</span>
<span id="846">846</span>
<span id="847">847</span>
<span id="848">848</span>
<span id="849">849</span>
<span id="850">850</span>
<span id="851">851</span>
<span id="852">852</span>
<span id="853">853</span>
<span id="854">854</span>
<span id="855">855</span>
<span id="856">856</span>
<span id="857">857</span>
<span id="858">858</span>
<span id="859">859</span>
<span id="860">860</span>
<span id="861">861</span>
<span id="862">862</span>
<span id="863">863</span>
<span id="864">864</span>
<span id="865">865</span>
<span id="866">866</span>
<span id="867">867</span>
<span id="868">868</span>
<span id="869">869</span>
<span id="870">870</span>
<span id="871">871</span>
<span id="872">872</span>
<span id="873">873</span>
<span id="874">874</span>
<span id="875">875</span>
<span id="876">876</span>
<span id="877">877</span>
<span id="878">878</span>
<span id="879">879</span>
<span id="880">880</span>
<span id="881">881</span>
<span id="882">882</span>
<span id="883">883</span>
<span id="884">884</span>
<span id="885">885</span>
<span id="886">886</span>
<span id="887">887</span>
<span id="888">888</span>
<span id="889">889</span>
<span id="890">890</span>
<span id="891">891</span>
<span id="892">892</span>
<span id="893">893</span>
<span id="894">894</span>
<span id="895">895</span>
<span id="896">896</span>
<span id="897">897</span>
<span id="898">898</span>
<span id="899">899</span>
<span id="900">900</span>
<span id="901">901</span>
<span id="902">902</span>
<span id="903">903</span>
<span id="904">904</span>
<span id="905">905</span>
<span id="906">906</span>
<span id="907">907</span>
<span id="908">908</span>
<span id="909">909</span>
<span id="910">910</span>
<span id="911">911</span>
<span id="912">912</span>
<span id="913">913</span>
<span id="914">914</span>
<span id="915">915</span>
<span id="916">916</span>
<span id="917">917</span>
<span id="918">918</span>
<span id="919">919</span>
<span id="920">920</span>
<span id="921">921</span>
<span id="922">922</span>
<span id="923">923</span>
<span id="924">924</span>
<span id="925">925</span>
<span id="926">926</span>
<span id="927">927</span>
<span id="928">928</span>
<span id="929">929</span>
<span id="930">930</span>
<span id="931">931</span>
<span id="932">932</span>
<span id="933">933</span>
<span id="934">934</span>
<span id="935">935</span>
<span id="936">936</span>
<span id="937">937</span>
<span id="938">938</span>
<span id="939">939</span>
<span id="940">940</span>
<span id="941">941</span>
<span id="942">942</span>
<span id="943">943</span>
<span id="944">944</span>
<span id="945">945</span>
<span id="946">946</span>
<span id="947">947</span>
<span id="948">948</span>
<span id="949">949</span>
<span id="950">950</span>
<span id="951">951</span>
<span id="952">952</span>
<span id="953">953</span>
<span id="954">954</span>
<span id="955">955</span>
<span id="956">956</span>
<span id="957">957</span>
<span id="958">958</span>
<span id="959">959</span>
<span id="960">960</span>
<span id="961">961</span>
<span id="962">962</span>
<span id="963">963</span>
<span id="964">964</span>
<span id="965">965</span>
<span id="966">966</span>
<span id="967">967</span>
<span id="968">968</span>
<span id="969">969</span>
<span id="970">970</span>
<span id="971">971</span>
<span id="972">972</span>
<span id="973">973</span>
<span id="974">974</span>
<span id="975">975</span>
<span id="976">976</span>
<span id="977">977</span>
<span id="978">978</span>
<span id="979">979</span>
<span id="980">980</span>
<span id="981">981</span>
<span id="982">982</span>
<span id="983">983</span>
<span id="984">984</span>
<span id="985">985</span>
<span id="986">986</span>
<span id="987">987</span>
<span id="988">988</span>
<span id="989">989</span>
<span id="990">990</span>
<span id="991">991</span>
<span id="992">992</span>
<span id="993">993</span>
<span id="994">994</span>
<span id="995">995</span>
<span id="996">996</span>
<span id="997">997</span>
<span id="998">998</span>
<span id="999">999</span>
<span id="1000">1000</span>
<span id="1001">1001</span>
<span id="1002">1002</span>
<span id="1003">1003</span>
<span id="1004">1004</span>
<span id="1005">1005</span>
<span id="1006">1006</span>
<span id="1007">1007</span>
<span id="1008">1008</span>
<span id="1009">1009</span>
<span id="1010">1010</span>
<span id="1011">1011</span>
<span id="1012">1012</span>
<span id="1013">1013</span>
<span id="1014">1014</span>
<span id="1015">1015</span>
<span id="1016">1016</span>
<span id="1017">1017</span>
<span id="1018">1018</span>
<span id="1019">1019</span>
<span id="1020">1020</span>
<span id="1021">1021</span>
<span id="1022">1022</span>
<span id="1023">1023</span>
<span id="1024">1024</span>
<span id="1025">1025</span>
<span id="1026">1026</span>
<span id="1027">1027</span>
<span id="1028">1028</span>
<span id="1029">1029</span>
<span id="1030">1030</span>
<span id="1031">1031</span>
<span id="1032">1032</span>
<span id="1033">1033</span>
<span id="1034">1034</span>
<span id="1035">1035</span>
<span id="1036">1036</span>
<span id="1037">1037</span>
<span id="1038">1038</span>
<span id="1039">1039</span>
<span id="1040">1040</span>
<span id="1041">1041</span>
<span id="1042">1042</span>
<span id="1043">1043</span>
<span id="1044">1044</span>
<span id="1045">1045</span>
<span id="1046">1046</span>
<span id="1047">1047</span>
<span id="1048">1048</span>
<span id="1049">1049</span>
<span id="1050">1050</span>
<span id="1051">1051</span>
<span id="1052">1052</span>
<span id="1053">1053</span>
<span id="1054">1054</span>
<span id="1055">1055</span>
<span id="1056">1056</span>
<span id="1057">1057</span>
<span id="1058">1058</span>
<span id="1059">1059</span>
<span id="1060">1060</span>
<span id="1061">1061</span>
<span id="1062">1062</span>
<span id="1063">1063</span>
<span id="1064">1064</span>
<span id="1065">1065</span>
<span id="1066">1066</span>
<span id="1067">1067</span>
<span id="1068">1068</span>
<span id="1069">1069</span>
<span id="1070">1070</span>
<span id="1071">1071</span>
<span id="1072">1072</span>
<span id="1073">1073</span>
<span id="1074">1074</span>
<span id="1075">1075</span>
<span id="1076">1076</span>
<span id="1077">1077</span>
<span id="1078">1078</span>
<span id="1079">1079</span>
<span id="1080">1080</span>
<span id="1081">1081</span>
<span id="1082">1082</span>
<span id="1083">1083</span>
<span id="1084">1084</span>
<span id="1085">1085</span>
<span id="1086">1086</span>
<span id="1087">1087</span>
<span id="1088">1088</span>
<span id="1089">1089</span>
<span id="1090">1090</span>
<span id="1091">1091</span>
<span id="1092">1092</span>
<span id="1093">1093</span>
<span id="1094">1094</span>
<span id="1095">1095</span>
<span id="1096">1096</span>
<span id="1097">1097</span>
<span id="1098">1098</span>
<span id="1099">1099</span>
<span id="1100">1100</span>
<span id="1101">1101</span>
<span id="1102">1102</span>
<span id="1103">1103</span>
<span id="1104">1104</span>
<span id="1105">1105</span>
<span id="1106">1106</span>
<span id="1107">1107</span>
<span id="1108">1108</span>
<span id="1109">1109</span>
<span id="1110">1110</span>
<span id="1111">1111</span>
<span id="1112">1112</span>
<span id="1113">1113</span>
<span id="1114">1114</span>
<span id="1115">1115</span>
<span id="1116">1116</span>
<span id="1117">1117</span>
<span id="1118">1118</span>
<span id="1119">1119</span>
<span id="1120">1120</span>
<span id="1121">1121</span>
<span id="1122">1122</span>
<span id="1123">1123</span>
<span id="1124">1124</span>
<span id="1125">1125</span>
<span id="1126">1126</span>
<span id="1127">1127</span>
<span id="1128">1128</span>
<span id="1129">1129</span>
<span id="1130">1130</span>
<span id="1131">1131</span>
<span id="1132">1132</span>
<span id="1133">1133</span>
<span id="1134">1134</span>
<span id="1135">1135</span>
<span id="1136">1136</span>
<span id="1137">1137</span>
<span id="1138">1138</span>
<span id="1139">1139</span>
<span id="1140">1140</span>
<span id="1141">1141</span>
<span id="1142">1142</span>
<span id="1143">1143</span>
<span id="1144">1144</span>
<span id="1145">1145</span>
<span id="1146">1146</span>
<span id="1147">1147</span>
<span id="1148">1148</span>
<span id="1149">1149</span>
<span id="1150">1150</span>
<span id="1151">1151</span>
<span id="1152">1152</span>
<span id="1153">1153</span>
<span id="1154">1154</span>
<span id="1155">1155</span>
<span id="1156">1156</span>
<span id="1157">1157</span>
<span id="1158">1158</span>
<span id="1159">1159</span>
<span id="1160">1160</span>
<span id="1161">1161</span>
<span id="1162">1162</span>
<span id="1163">1163</span>
<span id="1164">1164</span>
<span id="1165">1165</span>
<span id="1166">1166</span>
<span id="1167">1167</span>
<span id="1168">1168</span>
<span id="1169">1169</span>
<span id="1170">1170</span>
<span id="1171">1171</span>
<span id="1172">1172</span>
<span id="1173">1173</span>
<span id="1174">1174</span>
<span id="1175">1175</span>
<span id="1176">1176</span>
<span id="1177">1177</span>
<span id="1178">1178</span>
<span id="1179">1179</span>
<span id="1180">1180</span>
<span id="1181">1181</span>
<span id="1182">1182</span>
<span id="1183">1183</span>
<span id="1184">1184</span>
<span id="1185">1185</span>
<span id="1186">1186</span>
<span id="1187">1187</span>
<span id="1188">1188</span>
<span id="1189">1189</span>
<span id="1190">1190</span>
<span id="1191">1191</span>
<span id="1192">1192</span>
<span id="1193">1193</span>
<span id="1194">1194</span>
<span id="1195">1195</span>
<span id="1196">1196</span>
<span id="1197">1197</span>
<span id="1198">1198</span>
<span id="1199">1199</span>
<span id="1200">1200</span>
<span id="1201">1201</span>
<span id="1202">1202</span>
<span id="1203">1203</span>
<span id="1204">1204</span>
<span id="1205">1205</span>
<span id="1206">1206</span>
<span id="1207">1207</span>
<span id="1208">1208</span>
<span id="1209">1209</span>
<span id="1210">1210</span>
<span id="1211">1211</span>
<span id="1212">1212</span>
<span id="1213">1213</span>
<span id="1214">1214</span>
<span id="1215">1215</span>
<span id="1216">1216</span>
<span id="1217">1217</span>
<span id="1218">1218</span>
<span id="1219">1219</span>
<span id="1220">1220</span>
<span id="1221">1221</span>
<span id="1222">1222</span>
<span id="1223">1223</span>
<span id="1224">1224</span>
<span id="1225">1225</span>
<span id="1226">1226</span>
<span id="1227">1227</span>
<span id="1228">1228</span>
<span id="1229">1229</span>
<span id="1230">1230</span>
<span id="1231">1231</span>
<span id="1232">1232</span>
<span id="1233">1233</span>
<span id="1234">1234</span>
<span id="1235">1235</span>
<span id="1236">1236</span>
<span id="1237">1237</span>
<span id="1238">1238</span>
<span id="1239">1239</span>
<span id="1240">1240</span>
<span id="1241">1241</span>
<span id="1242">1242</span>
<span id="1243">1243</span>
<span id="1244">1244</span>
<span id="1245">1245</span>
<span id="1246">1246</span>
<span id="1247">1247</span>
<span id="1248">1248</span>
<span id="1249">1249</span>
<span id="1250">1250</span>
<span id="1251">1251</span>
<span id="1252">1252</span>
<span id="1253">1253</span>
<span id="1254">1254</span>
<span id="1255">1255</span>
<span id="1256">1256</span>
<span id="1257">1257</span>
<span id="1258">1258</span>
<span id="1259">1259</span>
<span id="1260">1260</span>
<span id="1261">1261</span>
<span id="1262">1262</span>
<span id="1263">1263</span>
<span id="1264">1264</span>
<span id="1265">1265</span>
<span id="1266">1266</span>
<span id="1267">1267</span>
<span id="1268">1268</span>
<span id="1269">1269</span>
<span id="1270">1270</span>
<span id="1271">1271</span>
<span id="1272">1272</span>
<span id="1273">1273</span>
<span id="1274">1274</span>
<span id="1275">1275</span>
<span id="1276">1276</span>
<span id="1277">1277</span>
<span id="1278">1278</span>
<span id="1279">1279</span>
<span id="1280">1280</span>
<span id="1281">1281</span>
<span id="1282">1282</span>
<span id="1283">1283</span>
<span id="1284">1284</span>
<span id="1285">1285</span>
<span id="1286">1286</span>
<span id="1287">1287</span>
<span id="1288">1288</span>
<span id="1289">1289</span>
<span id="1290">1290</span>
<span id="1291">1291</span>
<span id="1292">1292</span>
<span id="1293">1293</span>
<span id="1294">1294</span>
<span id="1295">1295</span>
<span id="1296">1296</span>
<span id="1297">1297</span>
<span id="1298">1298</span>
<span id="1299">1299</span>
<span id="1300">1300</span>
<span id="1301">1301</span>
<span id="1302">1302</span>
<span id="1303">1303</span>
<span id="1304">1304</span>
<span id="1305">1305</span>
<span id="1306">1306</span>
<span id="1307">1307</span>
<span id="1308">1308</span>
<span id="1309">1309</span>
<span id="1310">1310</span>
<span id="1311">1311</span>
<span id="1312">1312</span>
<span id="1313">1313</span>
<span id="1314">1314</span>
<span id="1315">1315</span>
<span id="1316">1316</span>
<span id="1317">1317</span>
<span id="1318">1318</span>
<span id="1319">1319</span>
<span id="1320">1320</span>
<span id="1321">1321</span>
<span id="1322">1322</span>
<span id="1323">1323</span>
<span id="1324">1324</span>
<span id="1325">1325</span>
<span id="1326">1326</span>
</pre><pre class="rust"><code><span class="doccomment">/*!
Provides a noncontiguous NFA implementation of Aho-Corasick.
This is a low-level API that generally only needs to be used in niche
circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick)
instead of a noncontiguous NFA directly. Using an `NFA` directly is typically
only necessary when one needs access to the [`Automaton`] trait implementation.
*/
</span><span class="kw">use </span>alloc::{
collections::{BTreeSet, VecDeque},
vec,
vec::Vec,
};
<span class="kw">use crate</span>::{
automaton::Automaton,
util::{
alphabet::{ByteClassSet, ByteClasses},
error::{BuildError, MatchError},
prefilter::{<span class="self">self</span>, opposite_ascii_case, Prefilter},
primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID},
remapper::Remapper,
search::{Anchored, MatchKind},
special::Special,
},
};
<span class="doccomment">/// A noncontiguous NFA implementation of Aho-Corasick.
///
/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of
/// this type directly. Using an `NFA` directly is typically only necessary
/// when one needs access to the [`Automaton`] trait implementation.
///
/// This NFA represents the &quot;core&quot; implementation of Aho-Corasick in this
/// crate. Namely, constructing this NFA involving building a trie and then
/// filling in the failure transitions between states, similar to what is
/// described in any standard textbook description of Aho-Corasick.
///
/// In order to minimize heap usage and to avoid additional construction costs,
/// this implementation represents the transitions of all states as distinct
/// sparse memory allocations. This is where it gets its name from. That is,
/// this NFA has no contiguous memory allocation for its transition table. Each
/// state gets its own allocation.
///
/// While the sparse representation keeps memory usage to somewhat reasonable
/// levels, it is still quite large and also results in somewhat mediocre
/// search performance. For this reason, it is almost always a good idea to
/// use a [`contiguous::NFA`](crate::nfa::contiguous::NFA) instead. It is
/// marginally slower to build, but has higher throughput and can sometimes use
/// an order of magnitude less memory. The main reason to use a noncontiguous
/// NFA is when you need the fastest possible construction time, or when a
/// contiguous NFA does not have the desired capacity. (The total number of NFA
/// states it can have is fewer than a noncontiguous NFA.)
///
/// # Example
///
/// This example shows how to build an `NFA` directly and use it to execute
/// [`Automaton::try_find`]:
///
/// ```
/// use aho_corasick::{
/// automaton::Automaton,
/// nfa::noncontiguous::NFA,
/// Input, Match,
/// };
///
/// let patterns = &amp;[&quot;b&quot;, &quot;abc&quot;, &quot;abcd&quot;];
/// let haystack = &quot;abcd&quot;;
///
/// let nfa = NFA::new(patterns).unwrap();
/// assert_eq!(
/// Some(Match::must(0, 1..2)),
/// nfa.try_find(&amp;Input::new(haystack))?,
/// );
/// # Ok::&lt;(), Box&lt;dyn std::error::Error&gt;&gt;(())
/// ```
///
/// It is also possible to implement your own version of `try_find`. See the
/// [`Automaton`] documentation for an example.
</span><span class="attribute">#[derive(Clone)]
</span><span class="kw">pub struct </span>NFA {
<span class="doccomment">/// The match semantics built into this NFA.
</span>match_kind: MatchKind,
<span class="doccomment">/// A set of states. Each state defines its own transitions, a fail
/// transition and a set of indices corresponding to matches.
///
/// The first state is always the fail state, which is used only as a
/// sentinel. Namely, in the final NFA, no transition into the fail state
/// exists. (Well, they do, but they aren&#39;t followed. Instead, the state&#39;s
/// failure transition is followed.)
///
/// The second state (index 1) is always the dead state. Dead states are
/// in every automaton, but only used when leftmost-{first,longest} match
/// semantics are enabled. Specifically, they instruct search to stop
/// at specific points in order to report the correct match location. In
/// the standard Aho-Corasick construction, there are no transitions to
/// the dead state.
///
/// The third state (index 2) is generally intended to be the starting or
/// &quot;root&quot; state.
</span>states: Vec&lt;State&gt;,
<span class="doccomment">/// The length, in bytes, of each pattern in this NFA. This slice is
/// indexed by `PatternID`.
///
/// The number of entries in this vector corresponds to the total number of
/// patterns in this automaton.
</span>pattern_lens: Vec&lt;SmallIndex&gt;,
<span class="doccomment">/// A prefilter for quickly skipping to candidate matches, if pertinent.
</span>prefilter: <span class="prelude-ty">Option</span>&lt;Prefilter&gt;,
<span class="doccomment">/// A set of equivalence classes in terms of bytes. We compute this while
/// building the NFA, but don&#39;t use it in the NFA&#39;s states. Instead, we
/// use this for building the DFA. We store it on the NFA since it&#39;s easy
/// to compute while visiting the patterns.
</span>byte_classes: ByteClasses,
<span class="doccomment">/// The length, in bytes, of the shortest pattern in this automaton. This
/// information is useful for detecting whether an automaton matches the
/// empty string or not.
</span>min_pattern_len: usize,
<span class="doccomment">/// The length, in bytes, of the longest pattern in this automaton. This
/// information is useful for keeping correct buffer sizes when searching
/// on streams.
</span>max_pattern_len: usize,
<span class="doccomment">/// The information required to deduce which states are &quot;special&quot; in this
/// NFA.
///
/// Since the DEAD and FAIL states are always the first two states and
/// there are only ever two start states (which follow all of the match
/// states), it follows that we can determine whether a state is a fail,
/// dead, match or start with just a few comparisons on the ID itself:
///
/// is_dead(sid): sid == NFA::DEAD
/// is_fail(sid): sid == NFA::FAIL
/// is_match(sid): NFA::FAIL &lt; sid &amp;&amp; sid &lt;= max_match_id
/// is_start(sid): sid == start_unanchored_id || sid == start_anchored_id
///
/// Note that this only applies to the NFA after it has been constructed.
/// During construction, the start states are the first ones added and the
/// match states are inter-leaved with non-match states. Once all of the
/// states have been added, the states are shuffled such that the above
/// predicates hold.
</span>special: Special,
<span class="doccomment">/// The number of bytes of heap used by this sparse NFA.
</span>memory_usage: usize,
}
<span class="kw">impl </span>NFA {
<span class="doccomment">/// Create a new Aho-Corasick noncontiguous NFA using the default
/// configuration.
///
/// Use a [`Builder`] if you want to change the configuration.
</span><span class="kw">pub fn </span>new&lt;I, P&gt;(patterns: I) -&gt; <span class="prelude-ty">Result</span>&lt;NFA, BuildError&gt;
<span class="kw">where
</span>I: IntoIterator&lt;Item = P&gt;,
P: AsRef&lt;[u8]&gt;,
{
NFA::builder().build(patterns)
}
<span class="doccomment">/// A convenience method for returning a new Aho-Corasick noncontiguous NFA
/// builder.
///
/// This usually permits one to just import the `NFA` type.
</span><span class="kw">pub fn </span>builder() -&gt; Builder {
Builder::new()
}
}
<span class="kw">impl </span>NFA {
<span class="doccomment">/// The DEAD state is a sentinel state like the FAIL state. The DEAD state
/// instructs any search to stop and return any currently recorded match,
/// or no match otherwise. Generally speaking, it is impossible for an
/// unanchored standard search to enter a DEAD state. But an anchored
/// search can, and so to can a leftmost search.
///
/// We put DEAD before FAIL so that DEAD is always 0. We repeat this
/// decision across the other Aho-Corasicm automata, so that DEAD
/// states there are always 0 too. It&#39;s not that we need all of the
/// implementations to agree, but rather, the contiguous NFA and the DFA
/// use a sort of &quot;premultiplied&quot; state identifier where the only state
/// whose ID is always known and constant is the first state. Subsequent
/// state IDs depend on how much space has already been used in the
/// transition table.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">const </span>DEAD: StateID = StateID::new_unchecked(<span class="number">0</span>);
<span class="doccomment">/// The FAIL state mostly just corresponds to the ID of any transition on a
/// state that isn&#39;t explicitly defined. When one transitions into the FAIL
/// state, one must follow the previous state&#39;s failure transition before
/// doing the next state lookup. In this way, FAIL is more of a sentinel
/// than a state that one actually transitions into. In particular, it is
/// never exposed in the `Automaton` interface.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">const </span>FAIL: StateID = StateID::new_unchecked(<span class="number">1</span>);
<span class="doccomment">/// Returns the equivalence classes of bytes found while constructing
/// this NFA.
///
/// Note that the NFA doesn&#39;t actually make use of these equivalence
/// classes. Instead, these are useful for building the DFA when desired.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">fn </span>byte_classes(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="kw-2">&amp;</span>ByteClasses {
<span class="kw-2">&amp;</span><span class="self">self</span>.byte_classes
}
<span class="doccomment">/// Returns a slice containing the length of each pattern in this searcher.
/// It is indexed by `PatternID` and has length `NFA::patterns_len`.
///
/// This is exposed for convenience when building a contiguous NFA. But it
/// can be reconstructed from the `Automaton` API if necessary.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">fn </span>pattern_lens_raw(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="kw-2">&amp;</span>[SmallIndex] {
<span class="kw-2">&amp;</span><span class="self">self</span>.pattern_lens
}
<span class="doccomment">/// Returns a slice of all states in this non-contiguous NFA.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">fn </span>states(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="kw-2">&amp;</span>[State] {
<span class="kw-2">&amp;</span><span class="self">self</span>.states
}
<span class="doccomment">/// Returns the underlying &quot;special&quot; state information for this NFA.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">fn </span>special(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="kw-2">&amp;</span>Special {
<span class="kw-2">&amp;</span><span class="self">self</span>.special
}
<span class="doccomment">/// Swaps the states at `id1` and `id2`.
///
/// This does not update the transitions of any state to account for the
/// state swap.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">fn </span>swap_states(<span class="kw-2">&amp;mut </span><span class="self">self</span>, id1: StateID, id2: StateID) {
<span class="self">self</span>.states.swap(id1.as_usize(), id2.as_usize());
}
<span class="doccomment">/// Re-maps all state IDs in this NFA according to the `map` function
/// given.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">fn </span>remap(<span class="kw-2">&amp;mut </span><span class="self">self</span>, map: <span class="kw">impl </span>Fn(StateID) -&gt; StateID) {
<span class="kw">for </span>state <span class="kw">in </span><span class="self">self</span>.states.iter_mut() {
state.fail = map(state.fail);
<span class="kw">for </span>(<span class="kw">_</span>, <span class="kw-2">ref mut </span>sid) <span class="kw">in </span>state.trans.iter_mut() {
<span class="kw-2">*</span>sid = map(<span class="kw-2">*</span>sid);
}
}
}
}
<span class="comment">// SAFETY: &#39;start_state&#39; always returns a valid state ID, &#39;next_state&#39; always
// returns a valid state ID given a valid state ID. We otherwise claim that
// all other methods are correct as well.
</span><span class="kw">unsafe impl </span>Automaton <span class="kw">for </span>NFA {
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>start_state(<span class="kw-2">&amp;</span><span class="self">self</span>, anchored: Anchored) -&gt; <span class="prelude-ty">Result</span>&lt;StateID, MatchError&gt; {
<span class="kw">match </span>anchored {
Anchored::No =&gt; <span class="prelude-val">Ok</span>(<span class="self">self</span>.special.start_unanchored_id),
Anchored::Yes =&gt; <span class="prelude-val">Ok</span>(<span class="self">self</span>.special.start_anchored_id),
}
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>next_state(
<span class="kw-2">&amp;</span><span class="self">self</span>,
anchored: Anchored,
<span class="kw-2">mut </span>sid: StateID,
byte: u8,
) -&gt; StateID {
<span class="comment">// This terminates since:
//
// 1. state.fail never points to the FAIL state.
// 2. All state.fail values point to a state closer to the start state.
// 3. The start state has no transitions to the FAIL state.
</span><span class="kw">loop </span>{
<span class="kw">let </span>state = <span class="kw-2">&amp;</span><span class="self">self</span>.states[sid];
<span class="kw">let </span>next = state.next_state(byte);
<span class="kw">if </span>next != NFA::FAIL {
<span class="kw">return </span>next;
}
<span class="comment">// For an anchored search, we never follow failure transitions
// because failure transitions lead us down a path to matching
// a *proper* suffix of the path we were on. Thus, it can only
// produce matches that appear after the beginning of the search.
</span><span class="kw">if </span>anchored.is_anchored() {
<span class="kw">return </span>NFA::DEAD;
}
sid = state.fail;
}
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>is_special(<span class="kw-2">&amp;</span><span class="self">self</span>, sid: StateID) -&gt; bool {
sid &lt;= <span class="self">self</span>.special.max_special_id
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>is_dead(<span class="kw-2">&amp;</span><span class="self">self</span>, sid: StateID) -&gt; bool {
sid == NFA::DEAD
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>is_match(<span class="kw-2">&amp;</span><span class="self">self</span>, sid: StateID) -&gt; bool {
<span class="comment">// N.B. This returns true when sid==NFA::FAIL but that&#39;s okay because
// NFA::FAIL is not actually a valid state ID from the perspective of
// the Automaton trait. Namely, it is never returned by &#39;start_state&#39;
// or by &#39;next_state&#39;. So we don&#39;t need to care about it here.
</span>!<span class="self">self</span>.is_dead(sid) &amp;&amp; sid &lt;= <span class="self">self</span>.special.max_match_id
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>is_start(<span class="kw-2">&amp;</span><span class="self">self</span>, sid: StateID) -&gt; bool {
sid == <span class="self">self</span>.special.start_unanchored_id
|| sid == <span class="self">self</span>.special.start_anchored_id
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>match_kind(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; MatchKind {
<span class="self">self</span>.match_kind
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>patterns_len(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="self">self</span>.pattern_lens.len()
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>pattern_len(<span class="kw-2">&amp;</span><span class="self">self</span>, pid: PatternID) -&gt; usize {
<span class="self">self</span>.pattern_lens[pid].as_usize()
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>min_pattern_len(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="self">self</span>.min_pattern_len
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>max_pattern_len(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="self">self</span>.max_pattern_len
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>match_len(<span class="kw-2">&amp;</span><span class="self">self</span>, sid: StateID) -&gt; usize {
<span class="self">self</span>.states[sid].matches.len()
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>match_pattern(<span class="kw-2">&amp;</span><span class="self">self</span>, sid: StateID, index: usize) -&gt; PatternID {
<span class="self">self</span>.states[sid].matches[index]
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>memory_usage(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="self">self</span>.memory_usage
+ <span class="self">self</span>.prefilter.as_ref().map_or(<span class="number">0</span>, |p| p.memory_usage())
}
<span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>prefilter(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; <span class="prelude-ty">Option</span>&lt;<span class="kw-2">&amp;</span>Prefilter&gt; {
<span class="self">self</span>.prefilter.as_ref()
}
}
<span class="doccomment">/// A representation of a sparse NFA state for an Aho-Corasick automaton.
///
/// It contains the transitions to the next state, a failure transition for
/// cases where there exists no other transition for the current input byte
/// and the matches implied by visiting this state (if any).
</span><span class="attribute">#[derive(Clone)]
</span><span class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">struct </span>State {
<span class="doccomment">/// The set of defined transitions for this state sorted by `u8`. In an
/// unanchored search, if a byte is not in this set of transitions, then
/// it should transition to `fail`. In an anchored search, it should
/// transition to the special DEAD state.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) trans: Vec&lt;(u8, StateID)&gt;,
<span class="doccomment">/// The patterns that match once this state is entered. Note that order
/// is important in the leftmost case. For example, if one adds &#39;foo&#39; and
/// &#39;foo&#39; (duplicate patterns are not disallowed), then in a leftmost-first
/// search, only the first &#39;foo&#39; will ever match.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) matches: Vec&lt;PatternID&gt;,
<span class="doccomment">/// The state that should be transitioned to if the current byte in the
/// haystack does not have a corresponding transition defined in this
/// state.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) fail: StateID,
<span class="doccomment">/// The depth of this state. Specifically, this is the distance from this
/// state to the starting state. (For the special sentinel states DEAD and
/// FAIL, their depth is always 0.) The depth of a starting state is 0.
///
/// Note that depth is currently not used in this non-contiguous NFA. It
/// may in the future, but it is used in the contiguous NFA. Namely, it
/// permits an optimization where states near the starting state have their
/// transitions stored in a dense fashion, but all other states have their
/// transitions stored in a sparse fashion. (This non-contiguous NFA uses
/// a sparse representation for all states unconditionally.) In any case,
/// this is really the only convenient place to compute and store this
/// information, which we need when building the contiguous NFA.
</span><span class="kw">pub</span>(<span class="kw">crate</span>) depth: SmallIndex,
}
<span class="kw">impl </span>State {
<span class="doccomment">/// Return the heap memory used by this state. Note that if `State` is
/// itself on the heap, then callers need to call this in addition to
/// `size_of::&lt;State&gt;()` to get the full heap memory used.
</span><span class="kw">fn </span>memory_usage(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; usize {
<span class="kw">use </span>core::mem::size_of;
(<span class="self">self</span>.trans.len() * size_of::&lt;(u8, StateID)&gt;())
+ (<span class="self">self</span>.matches.len() * size_of::&lt;PatternID&gt;())
}
<span class="doccomment">/// Return true if and only if this state is a match state.
</span><span class="kw">fn </span>is_match(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; bool {
!<span class="self">self</span>.matches.is_empty()
}
<span class="doccomment">/// Return the next state by following the transition for the given byte.
/// If no transition for the given byte is defined, then the FAIL state ID
/// is returned.
</span><span class="attribute">#[inline(always)]
</span><span class="kw">fn </span>next_state(<span class="kw-2">&amp;</span><span class="self">self</span>, byte: u8) -&gt; StateID {
<span class="comment">// This is a special case that targets the unanchored starting state.
// By construction, the unanchored starting state is actually a dense
// state, because every possible transition is defined on it. Any
// transitions that weren&#39;t added as part of initial trie construction
// get explicitly added as a self-transition back to itself. Thus, we
// can treat it as if it were dense and do a constant time lookup.
//
// This has *massive* benefit when executing searches because the
// unanchored starting state is by far the hottest state and is
// frequently visited. Moreover, the &#39;for&#39; loop below that works
// decently on an actually sparse state is disastrous on a state that
// is nearly or completely dense.
//
// This optimization also works in general, including for non-starting
// states that happen to have every transition defined. Namely, it
// is impossible for &#39;self.trans&#39; to have duplicate transitions (by
// construction) and transitions are always in sorted ascending order.
// So if a state has 256 transitions, it is, by construction, dense and
// amenable to constant time indexing.
</span><span class="kw">if </span><span class="self">self</span>.trans.len() == <span class="number">256 </span>{
<span class="self">self</span>.trans[usize::from(byte)].<span class="number">1
</span>} <span class="kw">else </span>{
<span class="kw">for </span><span class="kw-2">&amp;</span>(b, id) <span class="kw">in </span><span class="self">self</span>.trans.iter() {
<span class="kw">if </span>b == byte {
<span class="kw">return </span>id;
}
}
NFA::FAIL
}
}
<span class="doccomment">/// Set the transition for the given byte to the state ID given.
///
/// Note that one should not set transitions to the FAIL state. It is not
/// technically incorrect, but it wastes space. If a transition is not
/// defined, then it is automatically assumed to lead to the FAIL state.
</span><span class="kw">fn </span>set_next_state(<span class="kw-2">&amp;mut </span><span class="self">self</span>, byte: u8, next: StateID) {
<span class="kw">match </span><span class="self">self</span>.trans.binary_search_by_key(<span class="kw-2">&amp;</span>byte, |<span class="kw-2">&amp;</span>(b, <span class="kw">_</span>)| b) {
<span class="prelude-val">Ok</span>(i) =&gt; <span class="self">self</span>.trans[i] = (byte, next),
<span class="prelude-val">Err</span>(i) =&gt; <span class="self">self</span>.trans.insert(i, (byte, next)),
}
}
}
<span class="kw">impl </span>core::fmt::Debug <span class="kw">for </span>State {
<span class="kw">fn </span>fmt(<span class="kw-2">&amp;</span><span class="self">self</span>, f: <span class="kw-2">&amp;mut </span>core::fmt::Formatter) -&gt; core::fmt::Result {
<span class="kw">use crate</span>::{automaton::sparse_transitions, util::debug::DebugByte};
<span class="kw">let </span>it = sparse_transitions(<span class="self">self</span>.trans.iter().copied()).enumerate();
<span class="kw">for </span>(i, (start, end, sid)) <span class="kw">in </span>it {
<span class="kw">if </span>i &gt; <span class="number">0 </span>{
<span class="macro">write!</span>(f, <span class="string">&quot;, &quot;</span>)<span class="question-mark">?</span>;
}
<span class="kw">if </span>start == end {
<span class="macro">write!</span>(f, <span class="string">&quot;{:?} =&gt; {:?}&quot;</span>, DebugByte(start), sid.as_usize())<span class="question-mark">?</span>;
} <span class="kw">else </span>{
<span class="macro">write!</span>(
f,
<span class="string">&quot;{:?}-{:?} =&gt; {:?}&quot;</span>,
DebugByte(start),
DebugByte(end),
sid.as_usize()
)<span class="question-mark">?</span>;
}
}
<span class="prelude-val">Ok</span>(())
}
}
<span class="doccomment">/// A builder for configuring an Aho-Corasick noncontiguous NFA.
///
/// This builder has a subset of the options available to a
/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options,
/// their behavior is identical.
</span><span class="attribute">#[derive(Clone, Debug)]
</span><span class="kw">pub struct </span>Builder {
match_kind: MatchKind,
prefilter: bool,
ascii_case_insensitive: bool,
}
<span class="kw">impl </span>Default <span class="kw">for </span>Builder {
<span class="kw">fn </span>default() -&gt; Builder {
Builder {
match_kind: MatchKind::default(),
prefilter: <span class="bool-val">true</span>,
ascii_case_insensitive: <span class="bool-val">false</span>,
}
}
}
<span class="kw">impl </span>Builder {
<span class="doccomment">/// Create a new builder for configuring an Aho-Corasick noncontiguous NFA.
</span><span class="kw">pub fn </span>new() -&gt; Builder {
Builder::default()
}
<span class="doccomment">/// Build an Aho-Corasick noncontiguous NFA from the given iterator of
/// patterns.
///
/// A builder may be reused to create more NFAs.
</span><span class="kw">pub fn </span>build&lt;I, P&gt;(<span class="kw-2">&amp;</span><span class="self">self</span>, patterns: I) -&gt; <span class="prelude-ty">Result</span>&lt;NFA, BuildError&gt;
<span class="kw">where
</span>I: IntoIterator&lt;Item = P&gt;,
P: AsRef&lt;[u8]&gt;,
{
<span class="macro">debug!</span>(<span class="string">&quot;building non-contiguous NFA&quot;</span>);
<span class="kw">let </span>nfa = Compiler::new(<span class="self">self</span>)<span class="question-mark">?</span>.compile(patterns)<span class="question-mark">?</span>;
<span class="macro">debug!</span>(
<span class="string">&quot;non-contiguous NFA built, &lt;states: {:?}, size: {:?}&gt;&quot;</span>,
nfa.states.len(),
nfa.memory_usage()
);
<span class="prelude-val">Ok</span>(nfa)
}
<span class="doccomment">/// Set the desired match semantics.
///
/// See
/// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind)
/// for more documentation and examples.
</span><span class="kw">pub fn </span>match_kind(<span class="kw-2">&amp;mut </span><span class="self">self</span>, kind: MatchKind) -&gt; <span class="kw-2">&amp;mut </span>Builder {
<span class="self">self</span>.match_kind = kind;
<span class="self">self
</span>}
<span class="doccomment">/// Enable ASCII-aware case insensitive matching.
///
/// See
/// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive)
/// for more documentation and examples.
</span><span class="kw">pub fn </span>ascii_case_insensitive(<span class="kw-2">&amp;mut </span><span class="self">self</span>, yes: bool) -&gt; <span class="kw-2">&amp;mut </span>Builder {
<span class="self">self</span>.ascii_case_insensitive = yes;
<span class="self">self
</span>}
<span class="doccomment">/// Enable heuristic prefilter optimizations.
///
/// See
/// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter)
/// for more documentation and examples.
</span><span class="kw">pub fn </span>prefilter(<span class="kw-2">&amp;mut </span><span class="self">self</span>, yes: bool) -&gt; <span class="kw-2">&amp;mut </span>Builder {
<span class="self">self</span>.prefilter = yes;
<span class="self">self
</span>}
}
<span class="doccomment">/// A compiler uses a builder configuration and builds up the NFA formulation
/// of an Aho-Corasick automaton. This roughly corresponds to the standard
/// formulation described in textbooks, with some tweaks to support leftmost
/// searching.
</span><span class="attribute">#[derive(Debug)]
</span><span class="kw">struct </span>Compiler&lt;<span class="lifetime">&#39;a</span>&gt; {
builder: <span class="kw-2">&amp;</span><span class="lifetime">&#39;a </span>Builder,
prefilter: prefilter::Builder,
nfa: NFA,
byteset: ByteClassSet,
}
<span class="kw">impl</span>&lt;<span class="lifetime">&#39;a</span>&gt; Compiler&lt;<span class="lifetime">&#39;a</span>&gt; {
<span class="kw">fn </span>new(builder: <span class="kw-2">&amp;</span><span class="lifetime">&#39;a </span>Builder) -&gt; <span class="prelude-ty">Result</span>&lt;Compiler&lt;<span class="lifetime">&#39;a</span>&gt;, BuildError&gt; {
<span class="kw">let </span>prefilter = prefilter::Builder::new(builder.match_kind)
.ascii_case_insensitive(builder.ascii_case_insensitive);
<span class="prelude-val">Ok</span>(Compiler {
builder,
prefilter,
nfa: NFA {
match_kind: builder.match_kind,
states: <span class="macro">vec!</span>[],
pattern_lens: <span class="macro">vec!</span>[],
prefilter: <span class="prelude-val">None</span>,
byte_classes: ByteClasses::singletons(),
min_pattern_len: usize::MAX,
max_pattern_len: <span class="number">0</span>,
special: Special::zero(),
memory_usage: <span class="number">0</span>,
},
byteset: ByteClassSet::empty(),
})
}
<span class="kw">fn </span>compile&lt;I, P&gt;(<span class="kw-2">mut </span><span class="self">self</span>, patterns: I) -&gt; <span class="prelude-ty">Result</span>&lt;NFA, BuildError&gt;
<span class="kw">where
</span>I: IntoIterator&lt;Item = P&gt;,
P: AsRef&lt;[u8]&gt;,
{
<span class="comment">// the dead state, only used for leftmost and fixed to id==0
</span><span class="self">self</span>.add_state(<span class="number">0</span>)<span class="question-mark">?</span>;
<span class="comment">// the fail state, which is never entered and fixed to id==1
</span><span class="self">self</span>.add_state(<span class="number">0</span>)<span class="question-mark">?</span>;
<span class="comment">// unanchored start state, initially fixed to id==2 but later shuffled
// to appear after all non-start match states.
</span><span class="self">self</span>.nfa.special.start_unanchored_id = <span class="self">self</span>.add_state(<span class="number">0</span>)<span class="question-mark">?</span>;
<span class="comment">// anchored start state, initially fixed to id==3 but later shuffled
// to appear after unanchored start state.
</span><span class="self">self</span>.nfa.special.start_anchored_id = <span class="self">self</span>.add_state(<span class="number">0</span>)<span class="question-mark">?</span>;
<span class="comment">// Initialize the unanchored starting state in order to make it dense,
// and thus make transition lookups on this state faster.
</span><span class="self">self</span>.init_unanchored_start_state();
<span class="comment">// Build the base trie from the given patterns.
</span><span class="self">self</span>.build_trie(patterns)<span class="question-mark">?</span>;
<span class="comment">// Add transitions (and maybe matches) to the anchored starting state.
// The anchored starting state is used for anchored searches. The only
// mechanical difference between it and the unanchored start state is
// that missing transitions map to the DEAD state instead of the FAIL
// state.
</span><span class="self">self</span>.set_anchored_start_state();
<span class="comment">// Rewrite transitions to the FAIL state on the unanchored start state
// as self-transitions. This keeps the start state active at all times.
</span><span class="self">self</span>.add_unanchored_start_state_loop();
<span class="comment">// Set all transitions on the DEAD state to point to itself. This way,
// the DEAD state can never be escaped. It MUST be used as a sentinel
// in any correct search.
</span><span class="self">self</span>.add_dead_state_loop();
<span class="comment">// The meat of the Aho-Corasick algorithm: compute and write failure
// transitions. i.e., the state to move to when a transition isn&#39;t
// defined in the current state. These are epsilon transitions and thus
// make this formulation an NFA.
</span><span class="self">self</span>.fill_failure_transitions();
<span class="comment">// Handle a special case under leftmost semantics when at least one
// of the patterns is the empty string.
</span><span class="self">self</span>.close_start_state_loop_for_leftmost();
<span class="comment">// Shuffle states so that we have DEAD, FAIL, MATCH, ..., START, START,
// NON-MATCH, ... This permits us to very quickly query the type of
// the state we&#39;re currently in during a search.
</span><span class="self">self</span>.shuffle();
<span class="comment">// Turn our set of bytes into equivalent classes. This NFA
// implementation doesn&#39;t use byte classes directly, but any
// Aho-Corasick searcher built from this one might.
</span><span class="self">self</span>.nfa.byte_classes = <span class="self">self</span>.byteset.byte_classes();
<span class="self">self</span>.nfa.prefilter = <span class="self">self</span>.prefilter.build();
<span class="self">self</span>.calculate_memory_usage();
<span class="comment">// Store the maximum ID of all *relevant* special states. Start states
// are only relevant when we have a prefilter, otherwise, there is zero
// reason to care about whether a state is a start state or not during
// a search. Indeed, without a prefilter, we are careful to explicitly
// NOT care about start states, otherwise the search can ping pong
// between the unrolled loop and the handling of special-status states
// and destroy perf.
</span><span class="self">self</span>.nfa.special.max_special_id = <span class="kw">if </span><span class="self">self</span>.nfa.prefilter.is_some() {
<span class="comment">// Why the anchored starting state? Because we always put it
// after the unanchored starting state and it is therefore the
// maximum. Why put unanchored followed by anchored? No particular
// reason, but that&#39;s how the states are logically organized in the
// Thompson NFA implementation found in regex-automata. ¯\_(ツ)_/¯
</span><span class="self">self</span>.nfa.special.start_anchored_id
} <span class="kw">else </span>{
<span class="self">self</span>.nfa.special.max_match_id
};
<span class="prelude-val">Ok</span>(<span class="self">self</span>.nfa)
}
<span class="doccomment">/// This sets up the initial prefix trie that makes up the Aho-Corasick
/// automaton. Effectively, it creates the basic structure of the
/// automaton, where every pattern given has a path from the start state to
/// the end of the pattern.
</span><span class="kw">fn </span>build_trie&lt;I, P&gt;(<span class="kw-2">&amp;mut </span><span class="self">self</span>, patterns: I) -&gt; <span class="prelude-ty">Result</span>&lt;(), BuildError&gt;
<span class="kw">where
</span>I: IntoIterator&lt;Item = P&gt;,
P: AsRef&lt;[u8]&gt;,
{
<span class="lifetime">&#39;PATTERNS</span>: <span class="kw">for </span>(i, pat) <span class="kw">in </span>patterns.into_iter().enumerate() {
<span class="kw">let </span>pid = PatternID::new(i).map_err(|e| {
BuildError::pattern_id_overflow(
PatternID::MAX.as_u64(),
e.attempted(),
)
})<span class="question-mark">?</span>;
<span class="kw">let </span>pat = pat.as_ref();
<span class="kw">let </span>patlen = SmallIndex::new(pat.len())
.map_err(|<span class="kw">_</span>| BuildError::pattern_too_long(pid, pat.len()))<span class="question-mark">?</span>;
<span class="self">self</span>.nfa.min_pattern_len =
core::cmp::min(<span class="self">self</span>.nfa.min_pattern_len, pat.len());
<span class="self">self</span>.nfa.max_pattern_len =
core::cmp::max(<span class="self">self</span>.nfa.max_pattern_len, pat.len());
<span class="macro">assert_eq!</span>(
i,
<span class="self">self</span>.nfa.pattern_lens.len(),
<span class="string">&quot;expected number of patterns to match pattern ID&quot;
</span>);
<span class="self">self</span>.nfa.pattern_lens.push(patlen);
<span class="comment">// We add the pattern to the prefilter here because the pattern
// ID in the prefilter is determined with respect to the patterns
// added to the prefilter. That is, it isn&#39;t the ID we have here,
// but the one determined by its own accounting of patterns.
// To ensure they line up, we add every pattern we see to the
// prefilter, even if some patterns ultimately are impossible to
// match (in leftmost-first semantics specifically).
//
// Another way of doing this would be to expose an API in the
// prefilter to permit setting your own pattern IDs. Or to just use
// our own map and go between them. But this case is sufficiently
// rare that we don&#39;t bother and just make sure they&#39;re in sync.
</span><span class="kw">if </span><span class="self">self</span>.builder.prefilter {
<span class="self">self</span>.prefilter.add(pat);
}
<span class="kw">let </span><span class="kw-2">mut </span>prev = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="kw">let </span><span class="kw-2">mut </span>saw_match = <span class="bool-val">false</span>;
<span class="kw">for </span>(depth, <span class="kw-2">&amp;</span>b) <span class="kw">in </span>pat.iter().enumerate() {
<span class="comment">// When leftmost-first match semantics are requested, we
// specifically stop adding patterns when a previously added
// pattern is a prefix of it. We avoid adding it because
// leftmost-first semantics imply that the pattern can never
// match. This is not just an optimization to save space! It
// is necessary for correctness. In fact, this is the only
// difference in the automaton between the implementations for
// leftmost-first and leftmost-longest.
</span>saw_match = saw_match || <span class="self">self</span>.nfa.states[prev].is_match();
<span class="kw">if </span><span class="self">self</span>.builder.match_kind.is_leftmost_first() &amp;&amp; saw_match {
<span class="comment">// Skip to the next pattern immediately. This avoids
// incorrectly adding a match after this loop terminates.
</span><span class="kw">continue </span><span class="lifetime">&#39;PATTERNS</span>;
}
<span class="comment">// Add this byte to our equivalence classes. We don&#39;t use these
// for NFA construction. These are instead used only if we&#39;re
// building a DFA. They would technically be useful for the
// NFA, but it would require a second pass over the patterns.
</span><span class="self">self</span>.byteset.set_range(b, b);
<span class="kw">if </span><span class="self">self</span>.builder.ascii_case_insensitive {
<span class="kw">let </span>b = opposite_ascii_case(b);
<span class="self">self</span>.byteset.set_range(b, b);
}
<span class="comment">// If the transition from prev using the current byte already
// exists, then just move through it. Otherwise, add a new
// state. We track the depth here so that we can determine
// how to represent transitions. States near the start state
// use a dense representation that uses more memory but is
// faster. Other states use a sparse representation that uses
// less memory but is slower.
</span><span class="kw">let </span>next = <span class="self">self</span>.nfa.states[prev].next_state(b);
<span class="kw">if </span>next != NFA::FAIL {
prev = next;
} <span class="kw">else </span>{
<span class="kw">let </span>next = <span class="self">self</span>.add_state(depth)<span class="question-mark">?</span>;
<span class="self">self</span>.nfa.states[prev].set_next_state(b, next);
<span class="kw">if </span><span class="self">self</span>.builder.ascii_case_insensitive {
<span class="kw">let </span>b = opposite_ascii_case(b);
<span class="self">self</span>.nfa.states[prev].set_next_state(b, next);
}
prev = next;
}
}
<span class="comment">// Once the pattern has been added, log the match in the final
// state that it reached.
</span><span class="self">self</span>.nfa.states[prev].matches.push(pid);
}
<span class="prelude-val">Ok</span>(())
}
<span class="doccomment">/// This routine creates failure transitions according to the standard
/// textbook formulation of the Aho-Corasick algorithm, with a couple small
/// tweaks to support &quot;leftmost&quot; semantics.
///
/// Building failure transitions is the most interesting part of building
/// the Aho-Corasick automaton, because they are what allow searches to
/// be performed in linear time. Specifically, a failure transition is
/// a single transition associated with each state that points back to
/// the longest proper suffix of the pattern being searched. The failure
/// transition is followed whenever there exists no transition on the
/// current state for the current input byte. If there is no other proper
/// suffix, then the failure transition points back to the starting state.
///
/// For example, let&#39;s say we built an Aho-Corasick automaton with the
/// following patterns: &#39;abcd&#39; and &#39;cef&#39;. The trie looks like this:
///
/// ```ignore
/// a - S1 - b - S2 - c - S3 - d - S4*
/// /
/// S0 - c - S5 - e - S6 - f - S7*
/// ```
///
/// At this point, it should be fairly straight-forward to see how this
/// trie can be used in a simplistic way. At any given position in the
/// text we&#39;re searching (called the &quot;subject&quot; string), all we need to do
/// is follow the transitions in the trie by consuming one transition for
/// each byte in the subject string. If we reach a match state, then we can
/// report that location as a match.
///
/// The trick comes when searching a subject string like &#39;abcef&#39;. We&#39;ll
/// initially follow the transition from S0 to S1 and wind up in S3 after
/// observng the &#39;c&#39; byte. At this point, the next byte is &#39;e&#39; but state
/// S3 has no transition for &#39;e&#39;, so the search fails. We then would need
/// to restart the search at the next position in &#39;abcef&#39;, which
/// corresponds to &#39;b&#39;. The match would fail, but the next search starting
/// at &#39;c&#39; would finally succeed. The problem with this approach is that
/// we wind up searching the subject string potentially many times. In
/// effect, this makes the algorithm have worst case `O(n * m)` complexity,
/// where `n ~ len(subject)` and `m ~ len(all patterns)`. We would instead
/// like to achieve a `O(n + m)` worst case complexity.
///
/// This is where failure transitions come in. Instead of dying at S3 in
/// the first search, the automaton can instruct the search to move to
/// another part of the automaton that corresponds to a suffix of what
/// we&#39;ve seen so far. Recall that we&#39;ve seen &#39;abc&#39; in the subject string,
/// and the automaton does indeed have a non-empty suffix, &#39;c&#39;, that could
/// potentially lead to another match. Thus, the actual Aho-Corasick
/// automaton for our patterns in this case looks like this:
///
/// ```ignore
/// a - S1 - b - S2 - c - S3 - d - S4*
/// / /
/// / ----------------
/// / /
/// S0 - c - S5 - e - S6 - f - S7*
/// ```
///
/// That is, we have a failure transition from S3 to S5, which is followed
/// exactly in cases when we are in state S3 but see any byte other than
/// &#39;d&#39; (that is, we&#39;ve &quot;failed&quot; to find a match in this portion of our
/// trie). We know we can transition back to S5 because we&#39;ve already seen
/// a &#39;c&#39; byte, so we don&#39;t need to re-scan it. We can then pick back up
/// with the search starting at S5 and complete our match.
///
/// Adding failure transitions to a trie is fairly simple, but subtle. The
/// key issue is that you might have multiple failure transition that you
/// need to follow. For example, look at the trie for the patterns
/// &#39;abcd&#39;, &#39;b&#39;, &#39;bcd&#39; and &#39;cd&#39;:
///
/// ```ignore
/// - a - S1 - b - S2* - c - S3 - d - S4*
/// / / /
/// / ------- -------
/// / / /
/// S0 --- b - S5* - c - S6 - d - S7*
/// \ /
/// \ --------
/// \ /
/// - c - S8 - d - S9*
/// ```
///
/// The failure transitions for this trie are defined from S2 to S5,
/// S3 to S6 and S6 to S8. Moreover, state S2 needs to track that it
/// corresponds to a match, since its failure transition to S5 is itself
/// a match state.
///
/// Perhaps simplest way to think about adding these failure transitions
/// is recursively. That is, if you know the failure transitions for every
/// possible previous state that could be visited (e.g., when computing the
/// failure transition for S3, you already know the failure transitions
/// for S0, S1 and S2), then you can simply follow the failure transition
/// of the previous state and check whether the incoming transition is
/// defined after following the failure transition.
///
/// For example, when determining the failure state for S3, by our
/// assumptions, we already know that there is a failure transition from
/// S2 (the previous state) to S5. So we follow that transition and check
/// whether the transition connecting S2 to S3 is defined. Indeed, it is,
/// as there is a transition from S5 to S6 for the byte &#39;c&#39;. If no such
/// transition existed, we could keep following the failure transitions
/// until we reach the start state, which is the failure transition for
/// every state that has no corresponding proper suffix.
///
/// We don&#39;t actually use recursion to implement this, but instead, use a
/// breadth first search of the automaton. Our base case is the start
/// state, whose failure transition is just a transition to itself.
///
/// When building a leftmost automaton, we proceed as above, but only
/// include a subset of failure transitions. Namely, we omit any failure
/// transitions that appear after a match state in the trie. This is
/// because failure transitions always point back to a proper suffix of
/// what has been seen so far. Thus, following a failure transition after
/// a match implies looking for a match that starts after the one that has
/// already been seen, which is of course therefore not the leftmost match.
///
/// N.B. I came up with this algorithm on my own, and after scouring all of
/// the other AC implementations I know of (Perl, Snort, many on GitHub).
/// I couldn&#39;t find any that implement leftmost semantics like this.
/// Perl of course needs leftmost-first semantics, but they implement it
/// with a seeming hack at *search* time instead of encoding it into the
/// automaton. There are also a couple Java libraries that support leftmost
/// longest semantics, but they do it by building a queue of matches at
/// search time, which is even worse than what Perl is doing. ---AG
</span><span class="kw">fn </span>fill_failure_transitions(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>is_leftmost = <span class="self">self</span>.builder.match_kind.is_leftmost();
<span class="kw">let </span>start_uid = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="comment">// Initialize the queue for breadth first search with all transitions
// out of the start state. We handle the start state specially because
// we only want to follow non-self transitions. If we followed self
// transitions, then this would never terminate.
</span><span class="kw">let </span><span class="kw-2">mut </span>queue = VecDeque::new();
<span class="kw">let </span><span class="kw-2">mut </span>seen = <span class="self">self</span>.queued_set();
<span class="kw">for </span>i <span class="kw">in </span><span class="number">0</span>..<span class="self">self</span>.nfa.states[start_uid].trans.len() {
<span class="kw">let </span>(<span class="kw">_</span>, next) = <span class="self">self</span>.nfa.states[start_uid].trans[i];
<span class="comment">// Skip anything we&#39;ve seen before and any self-transitions on the
// start state.
</span><span class="kw">if </span>next == start_uid || seen.contains(next) {
<span class="kw">continue</span>;
}
queue.push_back(next);
seen.insert(next);
<span class="comment">// Under leftmost semantics, if a state immediately following
// the start state is a match state, then we never want to
// follow its failure transition since the failure transition
// necessarily leads back to the start state, which we never
// want to do for leftmost matching after a match has been
// found.
//
// We apply the same logic to non-start states below as well.
</span><span class="kw">if </span>is_leftmost &amp;&amp; <span class="self">self</span>.nfa.states[next].is_match() {
<span class="self">self</span>.nfa.states[next].fail = NFA::DEAD;
}
}
<span class="kw">while let </span><span class="prelude-val">Some</span>(id) = queue.pop_front() {
<span class="kw">for </span>i <span class="kw">in </span><span class="number">0</span>..<span class="self">self</span>.nfa.states[id].trans.len() {
<span class="kw">let </span>(b, next) = <span class="self">self</span>.nfa.states[id].trans[i];
<span class="kw">if </span>seen.contains(next) {
<span class="comment">// The only way to visit a duplicate state in a transition
// list is when ASCII case insensitivity is enabled. In
// this case, we want to skip it since it&#39;s redundant work.
// But it would also end up duplicating matches, which
// results in reporting duplicate matches in some cases.
// See the &#39;acasei010&#39; regression test.
</span><span class="kw">continue</span>;
}
queue.push_back(next);
seen.insert(next);
<span class="comment">// As above for start states, under leftmost semantics, once
// we see a match all subsequent states should have no failure
// transitions because failure transitions always imply looking
// for a match that is a suffix of what has been seen so far
// (where &quot;seen so far&quot; corresponds to the string formed by
// following the transitions from the start state to the
// current state). Under leftmost semantics, we specifically do
// not want to allow this to happen because we always want to
// report the match found at the leftmost position.
//
// The difference between leftmost-first and leftmost-longest
// occurs previously while we build the trie. For
// leftmost-first, we simply omit any entries that would
// otherwise require passing through a match state.
//
// Note that for correctness, the failure transition has to be
// set to the dead state for ALL states following a match, not
// just the match state itself. However, by setting the failure
// transition to the dead state on all match states, the dead
// state will automatically propagate to all subsequent states
// via the failure state computation below.
</span><span class="kw">if </span>is_leftmost &amp;&amp; <span class="self">self</span>.nfa.states[next].is_match() {
<span class="self">self</span>.nfa.states[next].fail = NFA::DEAD;
<span class="kw">continue</span>;
}
<span class="kw">let </span><span class="kw-2">mut </span>fail = <span class="self">self</span>.nfa.states[id].fail;
<span class="kw">while </span><span class="self">self</span>.nfa.states[fail].next_state(b) == NFA::FAIL {
fail = <span class="self">self</span>.nfa.states[fail].fail;
}
fail = <span class="self">self</span>.nfa.states[fail].next_state(b);
<span class="self">self</span>.nfa.states[next].fail = fail;
<span class="self">self</span>.copy_matches(fail, next);
}
<span class="comment">// If the start state is a match state, then this automaton can
// match the empty string. This implies all states are match states
// since every position matches the empty string, so copy the
// matches from the start state to every state. Strictly speaking,
// this is only necessary for overlapping matches since each
// non-empty non-start match state needs to report empty matches
// in addition to its own. For the non-overlapping case, such
// states only report the first match, which is never empty since
// it isn&#39;t a start state.
</span><span class="kw">if </span>!is_leftmost {
<span class="self">self</span>.copy_matches(<span class="self">self</span>.nfa.special.start_unanchored_id, id);
}
}
}
<span class="doccomment">/// Shuffle the states so that they appear in this sequence:
///
/// DEAD, FAIL, MATCH..., START, START, NON-MATCH...
///
/// The idea here is that if we know how special states are laid out in our
/// transition table, then we can determine what &quot;kind&quot; of state we&#39;re in
/// just by comparing our current state ID with a particular value. In this
/// way, we avoid doing extra memory lookups.
///
/// Before shuffling begins, our states look something like this:
///
/// DEAD, FAIL, START, START, (MATCH | NON-MATCH)...
///
/// So all we need to do is move all of the MATCH states so that they
/// all appear before any NON-MATCH state, like so:
///
/// DEAD, FAIL, START, START, MATCH... NON-MATCH...
///
/// Then it&#39;s just a simple matter of swapping the two START states with
/// the last two MATCH states.
///
/// (This is the same technique used for fully compiled DFAs in
/// regex-automata.)
</span><span class="kw">fn </span>shuffle(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>old_start_uid = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="kw">let </span>old_start_aid = <span class="self">self</span>.nfa.special.start_anchored_id;
<span class="macro">assert!</span>(old_start_uid &lt; old_start_aid);
<span class="macro">assert_eq!</span>(
<span class="number">3</span>,
old_start_aid.as_usize(),
<span class="string">&quot;anchored start state should be at index 3&quot;
</span>);
<span class="comment">// We implement shuffling by a sequence of pairwise swaps of states.
// Since we have a number of things referencing states via their
// IDs and swapping them changes their IDs, we need to record every
// swap we make so that we can remap IDs. The remapper handles this
// book-keeping for us.
</span><span class="kw">let </span><span class="kw-2">mut </span>remapper = Remapper::new(<span class="kw-2">&amp;</span><span class="self">self</span>.nfa, <span class="number">0</span>);
<span class="comment">// The way we proceed here is by moving all match states so that
// they directly follow the start states. So it will go: DEAD, FAIL,
// START-UNANCHORED, START-ANCHORED, MATCH, ..., NON-MATCH, ...
//
// To do that, we proceed forward through all states after
// START-ANCHORED and swap match states so that they appear before all
// non-match states.
</span><span class="kw">let </span><span class="kw-2">mut </span>next_avail = StateID::from(<span class="number">4u8</span>);
<span class="kw">for </span>i <span class="kw">in </span>next_avail.as_usize()..<span class="self">self</span>.nfa.states.len() {
<span class="kw">let </span>sid = StateID::new(i).unwrap();
<span class="kw">if </span>!<span class="self">self</span>.nfa.states[sid].is_match() {
<span class="kw">continue</span>;
}
remapper.swap(<span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa, sid, next_avail);
<span class="comment">// The key invariant here is that only non-match states exist
// between &#39;next_avail&#39; and &#39;sid&#39; (with them being potentially
// equivalent). Thus, incrementing &#39;next_avail&#39; by 1 is guaranteed
// to land on the leftmost non-match state. (Unless &#39;next_avail&#39;
// and &#39;sid&#39; are equivalent, in which case, a swap will occur but
// it is a no-op.)
</span>next_avail = StateID::new(next_avail.one_more()).unwrap();
}
<span class="comment">// Now we&#39;d like to move the start states to immediately following the
// match states. (The start states may themselves be match states, but
// we&#39;ll handle that later.) We arrange the states this way so that we
// don&#39;t necessarily need to check whether a state is a start state or
// not before checking whether a state is a match state. For example,
// we&#39;d like to be able to write this as our state machine loop:
//
// sid = start()
// for byte in haystack:
// sid = next(sid, byte)
// if sid &lt;= nfa.max_start_id:
// if sid &lt;= nfa.max_dead_id:
// # search complete
// elif sid &lt;= nfa.max_match_id:
// # found match
//
// The important context here is that we might not want to look for
// start states at all. Namely, if a searcher doesn&#39;t have a prefilter,
// then there is no reason to care about whether we&#39;re in a start state
// or not. And indeed, if we did check for it, this very hot loop would
// ping pong between the special state handling and the main state
// transition logic. This in turn stalls the CPU by killing branch
// prediction.
//
// So essentially, we really want to be able to &quot;forget&quot; that start
// states even exist and this is why we put them at the end.
</span><span class="kw">let </span>new_start_aid =
StateID::new(next_avail.as_usize().checked_sub(<span class="number">1</span>).unwrap())
.unwrap();
remapper.swap(<span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa, old_start_aid, new_start_aid);
<span class="kw">let </span>new_start_uid =
StateID::new(next_avail.as_usize().checked_sub(<span class="number">2</span>).unwrap())
.unwrap();
remapper.swap(<span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa, old_start_uid, new_start_uid);
<span class="kw">let </span>new_max_match_id =
StateID::new(next_avail.as_usize().checked_sub(<span class="number">3</span>).unwrap())
.unwrap();
<span class="self">self</span>.nfa.special.max_match_id = new_max_match_id;
<span class="self">self</span>.nfa.special.start_unanchored_id = new_start_uid;
<span class="self">self</span>.nfa.special.start_anchored_id = new_start_aid;
<span class="comment">// If one start state is a match state, then they both are.
</span><span class="kw">if </span><span class="self">self</span>.nfa.states[<span class="self">self</span>.nfa.special.start_anchored_id].is_match() {
<span class="self">self</span>.nfa.special.max_match_id = <span class="self">self</span>.nfa.special.start_anchored_id;
}
remapper.remap(<span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa);
}
<span class="doccomment">/// Returns a set that tracked queued states.
///
/// This is only necessary when ASCII case insensitivity is enabled, since
/// it is the only way to visit the same state twice. Otherwise, this
/// returns an inert set that nevers adds anything and always reports
/// `false` for every member test.
</span><span class="kw">fn </span>queued_set(<span class="kw-2">&amp;</span><span class="self">self</span>) -&gt; QueuedSet {
<span class="kw">if </span><span class="self">self</span>.builder.ascii_case_insensitive {
QueuedSet::active()
} <span class="kw">else </span>{
QueuedSet::inert()
}
}
<span class="doccomment">/// Initializes the unanchored start state by making it dense. This is
/// achieved by explicitly setting every transition to the FAIL state.
/// This isn&#39;t necessary for correctness, since any missing transition is
/// automatically assumed to be mapped to the FAIL state. We do this to
/// make the unanchored starting state dense, and thus in turn make
/// transition lookups on it faster. (Which is worth doing because it&#39;s
/// the most active state.)
</span><span class="kw">fn </span>init_unanchored_start_state(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>start_uid = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="kw">for </span>byte <span class="kw">in </span><span class="number">0</span>..=<span class="number">255 </span>{
<span class="self">self</span>.nfa.states[start_uid].set_next_state(byte, NFA::FAIL);
}
}
<span class="doccomment">/// Setup the anchored start state by copying all of the transitions and
/// matches from the unanchored starting state with one change: the failure
/// transition is changed to the DEAD state, so that for any undefined
/// transitions, the search will stop.
</span><span class="kw">fn </span>set_anchored_start_state(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>start_uid = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="kw">let </span>start_aid = <span class="self">self</span>.nfa.special.start_anchored_id;
<span class="self">self</span>.nfa.states[start_aid].trans =
<span class="self">self</span>.nfa.states[start_uid].trans.clone();
<span class="self">self</span>.copy_matches(start_uid, start_aid);
<span class="comment">// This is the main difference between the unanchored and anchored
// starting states. If a lookup on an anchored starting state fails,
// then the search should stop.
//
// N.B. This assumes that the loop on the unanchored starting state
// hasn&#39;t been created yet.
</span><span class="self">self</span>.nfa.states[start_aid].fail = NFA::DEAD;
}
<span class="doccomment">/// Set the failure transitions on the start state to loop back to the
/// start state. This effectively permits the Aho-Corasick automaton to
/// match at any position. This is also required for finding the next
/// state to terminate, namely, finding the next state should never return
/// a fail_id.
///
/// This must be done after building the initial trie, since trie
/// construction depends on transitions to `fail_id` to determine whether a
/// state already exists or not.
</span><span class="kw">fn </span>add_unanchored_start_state_loop(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>start_uid = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="kw">let </span>start = <span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa.states[start_uid];
<span class="kw">for </span>b <span class="kw">in </span><span class="number">0</span>..=<span class="number">255 </span>{
<span class="kw">if </span>start.next_state(b) == NFA::FAIL {
start.set_next_state(b, start_uid);
}
}
}
<span class="doccomment">/// Remove the start state loop by rewriting any transitions on the start
/// state back to the start state with transitions to the dead state.
///
/// The loop is only closed when two conditions are met: the start state
/// is a match state and the match kind is leftmost-first or
/// leftmost-longest.
///
/// The reason for this is that under leftmost semantics, a start state
/// that is also a match implies that we should never restart the search
/// process. We allow normal transitions out of the start state, but if
/// none exist, we transition to the dead state, which signals that
/// searching should stop.
</span><span class="kw">fn </span>close_start_state_loop_for_leftmost(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>start_uid = <span class="self">self</span>.nfa.special.start_unanchored_id;
<span class="kw">let </span>start = <span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa.states[start_uid];
<span class="kw">if </span><span class="self">self</span>.builder.match_kind.is_leftmost() &amp;&amp; start.is_match() {
<span class="kw">for </span>b <span class="kw">in </span><span class="number">0</span>..=<span class="number">255 </span>{
<span class="kw">if </span>start.next_state(b) == start_uid {
start.set_next_state(b, NFA::DEAD);
}
}
}
}
<span class="doccomment">/// Sets all transitions on the dead state to point back to the dead state.
/// Normally, missing transitions map back to the failure state, but the
/// point of the dead state is to act as a sink that can never be escaped.
</span><span class="kw">fn </span>add_dead_state_loop(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">let </span>dead = <span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa.states[NFA::DEAD];
<span class="kw">for </span>b <span class="kw">in </span><span class="number">0</span>..=<span class="number">255 </span>{
dead.set_next_state(b, NFA::DEAD);
}
}
<span class="doccomment">/// Copy matches from the `src` state to the `dst` state. This is useful
/// when a match state can be reached via a failure transition. In which
/// case, you&#39;ll want to copy the matches (if any) from the state reached
/// by the failure transition to the original state you were at.
</span><span class="kw">fn </span>copy_matches(<span class="kw-2">&amp;mut </span><span class="self">self</span>, src: StateID, dst: StateID) {
<span class="kw">let </span>(src, dst) =
get_two_mut(<span class="kw-2">&amp;mut </span><span class="self">self</span>.nfa.states, src.as_usize(), dst.as_usize());
dst.matches.extend_from_slice(<span class="kw-2">&amp;</span>src.matches);
}
<span class="doccomment">/// Allocate and add a fresh state to the underlying NFA and return its
/// ID (guaranteed to be one more than the ID of the previously allocated
/// state). If the ID would overflow `StateID`, then this returns an error.
</span><span class="kw">fn </span>add_state(<span class="kw-2">&amp;mut </span><span class="self">self</span>, depth: usize) -&gt; <span class="prelude-ty">Result</span>&lt;StateID, BuildError&gt; {
<span class="comment">// This is OK because we error when building the trie if we see a
// pattern whose length cannot fit into a &#39;SmallIndex&#39;, and the longest
// possible depth corresponds to the length of the longest pattern.
</span><span class="kw">let </span>depth = SmallIndex::new(depth)
.expect(<span class="string">&quot;patterns longer than SmallIndex::MAX are not allowed&quot;</span>);
<span class="kw">let </span>id = StateID::new(<span class="self">self</span>.nfa.states.len()).map_err(|e| {
BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted())
})<span class="question-mark">?</span>;
<span class="self">self</span>.nfa.states.push(State {
trans: <span class="macro">vec!</span>[],
matches: <span class="macro">vec!</span>[],
fail: <span class="self">self</span>.nfa.special.start_unanchored_id,
depth,
});
<span class="prelude-val">Ok</span>(id)
}
<span class="doccomment">/// Computes the total amount of heap used by this NFA in bytes.
</span><span class="kw">fn </span>calculate_memory_usage(<span class="kw-2">&amp;mut </span><span class="self">self</span>) {
<span class="kw">use </span>core::mem::size_of;
<span class="kw">for </span>state <span class="kw">in </span><span class="self">self</span>.nfa.states.iter() {
<span class="self">self</span>.nfa.memory_usage += size_of::&lt;State&gt;() + state.memory_usage();
}
}
}
<span class="doccomment">/// A set of state identifiers used to avoid revisiting the same state multiple
/// times when filling in failure transitions.
///
/// This set has an &quot;inert&quot; and an &quot;active&quot; mode. When inert, the set never
/// stores anything and always returns `false` for every member test. This is
/// useful to avoid the performance and memory overhead of maintaining this
/// set when it is not needed.
</span><span class="attribute">#[derive(Debug)]
</span><span class="kw">struct </span>QueuedSet {
set: <span class="prelude-ty">Option</span>&lt;BTreeSet&lt;StateID&gt;&gt;,
}
<span class="kw">impl </span>QueuedSet {
<span class="doccomment">/// Return an inert set that returns `false` for every state ID membership
/// test.
</span><span class="kw">fn </span>inert() -&gt; QueuedSet {
QueuedSet { set: <span class="prelude-val">None </span>}
}
<span class="doccomment">/// Return an active set that tracks state ID membership.
</span><span class="kw">fn </span>active() -&gt; QueuedSet {
QueuedSet { set: <span class="prelude-val">Some</span>(BTreeSet::new()) }
}
<span class="doccomment">/// Inserts the given state ID into this set. (If the set is inert, then
/// this is a no-op.)
</span><span class="kw">fn </span>insert(<span class="kw-2">&amp;mut </span><span class="self">self</span>, state_id: StateID) {
<span class="kw">if let </span><span class="prelude-val">Some</span>(<span class="kw-2">ref mut </span>set) = <span class="self">self</span>.set {
set.insert(state_id);
}
}
<span class="doccomment">/// Returns true if and only if the given state ID is in this set. If the
/// set is inert, this always returns false.
</span><span class="kw">fn </span>contains(<span class="kw-2">&amp;</span><span class="self">self</span>, state_id: StateID) -&gt; bool {
<span class="kw">match </span><span class="self">self</span>.set {
<span class="prelude-val">None </span>=&gt; <span class="bool-val">false</span>,
<span class="prelude-val">Some</span>(<span class="kw-2">ref </span>set) =&gt; set.contains(<span class="kw-2">&amp;</span>state_id),
}
}
}
<span class="kw">impl </span>core::fmt::Debug <span class="kw">for </span>NFA {
<span class="kw">fn </span>fmt(<span class="kw-2">&amp;</span><span class="self">self</span>, f: <span class="kw-2">&amp;mut </span>core::fmt::Formatter&lt;<span class="lifetime">&#39;_</span>&gt;) -&gt; core::fmt::Result {
<span class="kw">use </span><span class="kw">crate</span>::automaton::fmt_state_indicator;
<span class="macro">writeln!</span>(f, <span class="string">&quot;noncontiguous::NFA(&quot;</span>)<span class="question-mark">?</span>;
<span class="kw">for </span>(sid, state) <span class="kw">in </span><span class="self">self</span>.states.iter().with_state_ids() {
<span class="comment">// The FAIL state doesn&#39;t actually have space for a state allocated
// for it, so we have to treat it as a special case.
</span><span class="kw">if </span>sid == NFA::FAIL {
<span class="macro">writeln!</span>(f, <span class="string">&quot;F {:06}:&quot;</span>, sid.as_usize())<span class="question-mark">?</span>;
<span class="kw">continue</span>;
}
fmt_state_indicator(f, <span class="self">self</span>, sid)<span class="question-mark">?</span>;
<span class="macro">write!</span>(
f,
<span class="string">&quot;{:06}({:06}): &quot;</span>,
sid.as_usize(),
state.fail.as_usize()
)<span class="question-mark">?</span>;
state.fmt(f)<span class="question-mark">?</span>;
<span class="macro">write!</span>(f, <span class="string">&quot;\n&quot;</span>)<span class="question-mark">?</span>;
<span class="kw">if </span><span class="self">self</span>.is_match(sid) {
<span class="macro">write!</span>(f, <span class="string">&quot; matches: &quot;</span>)<span class="question-mark">?</span>;
<span class="kw">for </span>(i, pid) <span class="kw">in </span>state.matches.iter().enumerate() {
<span class="kw">if </span>i &gt; <span class="number">0 </span>{
<span class="macro">write!</span>(f, <span class="string">&quot;, &quot;</span>)<span class="question-mark">?</span>;
}
<span class="macro">write!</span>(f, <span class="string">&quot;{}&quot;</span>, pid.as_usize())<span class="question-mark">?</span>;
}
<span class="macro">write!</span>(f, <span class="string">&quot;\n&quot;</span>)<span class="question-mark">?</span>;
}
}
<span class="macro">writeln!</span>(f, <span class="string">&quot;match kind: {:?}&quot;</span>, <span class="self">self</span>.match_kind)<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;prefilter: {:?}&quot;</span>, <span class="self">self</span>.prefilter.is_some())<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;state length: {:?}&quot;</span>, <span class="self">self</span>.states.len())<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;pattern length: {:?}&quot;</span>, <span class="self">self</span>.patterns_len())<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;shortest pattern length: {:?}&quot;</span>, <span class="self">self</span>.min_pattern_len)<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;longest pattern length: {:?}&quot;</span>, <span class="self">self</span>.max_pattern_len)<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;memory usage: {:?}&quot;</span>, <span class="self">self</span>.memory_usage())<span class="question-mark">?</span>;
<span class="macro">writeln!</span>(f, <span class="string">&quot;)&quot;</span>)<span class="question-mark">?</span>;
<span class="prelude-val">Ok</span>(())
}
}
<span class="doccomment">/// Safely return two mutable borrows to two different locations in the given
/// slice.
///
/// This panics if i == j.
</span><span class="kw">fn </span>get_two_mut&lt;T&gt;(xs: <span class="kw-2">&amp;mut </span>[T], i: usize, j: usize) -&gt; (<span class="kw-2">&amp;mut </span>T, <span class="kw-2">&amp;mut </span>T) {
<span class="macro">assert!</span>(i != j, <span class="string">&quot;{} must not be equal to {}&quot;</span>, i, j);
<span class="kw">if </span>i &lt; j {
<span class="kw">let </span>(before, after) = xs.split_at_mut(j);
(<span class="kw-2">&amp;mut </span>before[i], <span class="kw-2">&amp;mut </span>after[<span class="number">0</span>])
} <span class="kw">else </span>{
<span class="kw">let </span>(before, after) = xs.split_at_mut(i);
(<span class="kw-2">&amp;mut </span>after[<span class="number">0</span>], <span class="kw-2">&amp;mut </span>before[j])
}
}
</code></pre></div>
</section></div></main><div id="rustdoc-vars" data-root-path="../../../" data-current-crate="aho_corasick" data-themes="ayu,dark,light" data-resource-suffix="" data-rustdoc-version="1.66.0-nightly (5c8bff74b 2022-10-21)" ></div></body></html>