| <!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `/root/.cargo/git/checkouts/tantivy-65d0bbbddbbd5d02/433372d/src/postings/serializer.rs`."><meta name="keywords" content="rust, rustlang, rust-lang"><title>serializer.rs - source</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceSerif4-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../FiraSans-Regular.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../FiraSans-Medium.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceCodePro-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceSerif4-Bold.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../../SourceCodePro-Semibold.ttf.woff2"><link rel="stylesheet" href="../../../normalize.css"><link rel="stylesheet" href="../../../rustdoc.css" id="mainThemeStyle"><link rel="stylesheet" href="../../../ayu.css" disabled><link rel="stylesheet" href="../../../dark.css" disabled><link rel="stylesheet" href="../../../light.css" id="themeStyle"><script id="default-settings" ></script><script src="../../../storage.js"></script><script defer src="../../../source-script.js"></script><script defer src="../../../source-files.js"></script><script defer src="../../../main.js"></script><noscript><link rel="stylesheet" href="../../../noscript.css"></noscript><link rel="alternate icon" type="image/png" href="../../../favicon-16x16.png"><link rel="alternate icon" type="image/png" href="../../../favicon-32x32.png"><link rel="icon" type="image/svg+xml" href="../../../favicon.svg"></head><body class="rustdoc source"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><a class="sidebar-logo" href="../../../tantivy/index.html"><div class="logo-container"> |
| <img src="http://fulmicoton.com/tantivy-logo/tantivy-logo.png" alt="logo"></div></a></nav><main><div class="width-limiter"><nav class="sub"><a class="sub-logo-container" href="../../../tantivy/index.html"> |
| <img src="http://fulmicoton.com/tantivy-logo/tantivy-logo.png" alt="logo"></a><form class="search-form"><div class="search-container"><span></span><input class="search-input" name="search" autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to search, ‘?’ for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../../../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../../../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../../../wheel.svg"></a></div></div></form></nav><section id="main-content" class="content"><div class="example-wrap"><pre class="src-line-numbers"><span id="1">1</span> |
| <span id="2">2</span> |
| <span id="3">3</span> |
| <span id="4">4</span> |
| <span id="5">5</span> |
| <span id="6">6</span> |
| <span id="7">7</span> |
| <span id="8">8</span> |
| <span id="9">9</span> |
| <span id="10">10</span> |
| <span id="11">11</span> |
| <span id="12">12</span> |
| <span id="13">13</span> |
| <span id="14">14</span> |
| <span id="15">15</span> |
| <span id="16">16</span> |
| <span id="17">17</span> |
| <span id="18">18</span> |
| <span id="19">19</span> |
| <span id="20">20</span> |
| <span id="21">21</span> |
| <span id="22">22</span> |
| <span id="23">23</span> |
| <span id="24">24</span> |
| <span id="25">25</span> |
| <span id="26">26</span> |
| <span id="27">27</span> |
| <span id="28">28</span> |
| <span id="29">29</span> |
| <span id="30">30</span> |
| <span id="31">31</span> |
| <span id="32">32</span> |
| <span id="33">33</span> |
| <span id="34">34</span> |
| <span id="35">35</span> |
| <span id="36">36</span> |
| <span id="37">37</span> |
| <span id="38">38</span> |
| <span id="39">39</span> |
| <span id="40">40</span> |
| <span id="41">41</span> |
| <span id="42">42</span> |
| <span id="43">43</span> |
| <span id="44">44</span> |
| <span id="45">45</span> |
| <span id="46">46</span> |
| <span id="47">47</span> |
| <span id="48">48</span> |
| <span id="49">49</span> |
| <span id="50">50</span> |
| <span id="51">51</span> |
| <span id="52">52</span> |
| <span id="53">53</span> |
| <span id="54">54</span> |
| <span id="55">55</span> |
| <span id="56">56</span> |
| <span id="57">57</span> |
| <span id="58">58</span> |
| <span id="59">59</span> |
| <span id="60">60</span> |
| <span id="61">61</span> |
| <span id="62">62</span> |
| <span id="63">63</span> |
| <span id="64">64</span> |
| <span id="65">65</span> |
| <span id="66">66</span> |
| <span id="67">67</span> |
| <span id="68">68</span> |
| <span id="69">69</span> |
| <span id="70">70</span> |
| <span id="71">71</span> |
| <span id="72">72</span> |
| <span id="73">73</span> |
| <span id="74">74</span> |
| <span id="75">75</span> |
| <span id="76">76</span> |
| <span id="77">77</span> |
| <span id="78">78</span> |
| <span id="79">79</span> |
| <span id="80">80</span> |
| <span id="81">81</span> |
| <span id="82">82</span> |
| <span id="83">83</span> |
| <span id="84">84</span> |
| <span id="85">85</span> |
| <span id="86">86</span> |
| <span id="87">87</span> |
| <span id="88">88</span> |
| <span id="89">89</span> |
| <span id="90">90</span> |
| <span id="91">91</span> |
| <span id="92">92</span> |
| <span id="93">93</span> |
| <span id="94">94</span> |
| <span id="95">95</span> |
| <span id="96">96</span> |
| <span id="97">97</span> |
| <span id="98">98</span> |
| <span id="99">99</span> |
| <span id="100">100</span> |
| <span id="101">101</span> |
| <span id="102">102</span> |
| <span id="103">103</span> |
| <span id="104">104</span> |
| <span id="105">105</span> |
| <span id="106">106</span> |
| <span id="107">107</span> |
| <span id="108">108</span> |
| <span id="109">109</span> |
| <span id="110">110</span> |
| <span id="111">111</span> |
| <span id="112">112</span> |
| <span id="113">113</span> |
| <span id="114">114</span> |
| <span id="115">115</span> |
| <span id="116">116</span> |
| <span id="117">117</span> |
| <span id="118">118</span> |
| <span id="119">119</span> |
| <span id="120">120</span> |
| <span id="121">121</span> |
| <span id="122">122</span> |
| <span id="123">123</span> |
| <span id="124">124</span> |
| <span id="125">125</span> |
| <span id="126">126</span> |
| <span id="127">127</span> |
| <span id="128">128</span> |
| <span id="129">129</span> |
| <span id="130">130</span> |
| <span id="131">131</span> |
| <span id="132">132</span> |
| <span id="133">133</span> |
| <span id="134">134</span> |
| <span id="135">135</span> |
| <span id="136">136</span> |
| <span id="137">137</span> |
| <span id="138">138</span> |
| <span id="139">139</span> |
| <span id="140">140</span> |
| <span id="141">141</span> |
| <span id="142">142</span> |
| <span id="143">143</span> |
| <span id="144">144</span> |
| <span id="145">145</span> |
| <span id="146">146</span> |
| <span id="147">147</span> |
| <span id="148">148</span> |
| <span id="149">149</span> |
| <span id="150">150</span> |
| <span id="151">151</span> |
| <span id="152">152</span> |
| <span id="153">153</span> |
| <span id="154">154</span> |
| <span id="155">155</span> |
| <span id="156">156</span> |
| <span id="157">157</span> |
| <span id="158">158</span> |
| <span id="159">159</span> |
| <span id="160">160</span> |
| <span id="161">161</span> |
| <span id="162">162</span> |
| <span id="163">163</span> |
| <span id="164">164</span> |
| <span id="165">165</span> |
| <span id="166">166</span> |
| <span id="167">167</span> |
| <span id="168">168</span> |
| <span id="169">169</span> |
| <span id="170">170</span> |
| <span id="171">171</span> |
| <span id="172">172</span> |
| <span id="173">173</span> |
| <span id="174">174</span> |
| <span id="175">175</span> |
| <span id="176">176</span> |
| <span id="177">177</span> |
| <span id="178">178</span> |
| <span id="179">179</span> |
| <span id="180">180</span> |
| <span id="181">181</span> |
| <span id="182">182</span> |
| <span id="183">183</span> |
| <span id="184">184</span> |
| <span id="185">185</span> |
| <span id="186">186</span> |
| <span id="187">187</span> |
| <span id="188">188</span> |
| <span id="189">189</span> |
| <span id="190">190</span> |
| <span id="191">191</span> |
| <span id="192">192</span> |
| <span id="193">193</span> |
| <span id="194">194</span> |
| <span id="195">195</span> |
| <span id="196">196</span> |
| <span id="197">197</span> |
| <span id="198">198</span> |
| <span id="199">199</span> |
| <span id="200">200</span> |
| <span id="201">201</span> |
| <span id="202">202</span> |
| <span id="203">203</span> |
| <span id="204">204</span> |
| <span id="205">205</span> |
| <span id="206">206</span> |
| <span id="207">207</span> |
| <span id="208">208</span> |
| <span id="209">209</span> |
| <span id="210">210</span> |
| <span id="211">211</span> |
| <span id="212">212</span> |
| <span id="213">213</span> |
| <span id="214">214</span> |
| <span id="215">215</span> |
| <span id="216">216</span> |
| <span id="217">217</span> |
| <span id="218">218</span> |
| <span id="219">219</span> |
| <span id="220">220</span> |
| <span id="221">221</span> |
| <span id="222">222</span> |
| <span id="223">223</span> |
| <span id="224">224</span> |
| <span id="225">225</span> |
| <span id="226">226</span> |
| <span id="227">227</span> |
| <span id="228">228</span> |
| <span id="229">229</span> |
| <span id="230">230</span> |
| <span id="231">231</span> |
| <span id="232">232</span> |
| <span id="233">233</span> |
| <span id="234">234</span> |
| <span id="235">235</span> |
| <span id="236">236</span> |
| <span id="237">237</span> |
| <span id="238">238</span> |
| <span id="239">239</span> |
| <span id="240">240</span> |
| <span id="241">241</span> |
| <span id="242">242</span> |
| <span id="243">243</span> |
| <span id="244">244</span> |
| <span id="245">245</span> |
| <span id="246">246</span> |
| <span id="247">247</span> |
| <span id="248">248</span> |
| <span id="249">249</span> |
| <span id="250">250</span> |
| <span id="251">251</span> |
| <span id="252">252</span> |
| <span id="253">253</span> |
| <span id="254">254</span> |
| <span id="255">255</span> |
| <span id="256">256</span> |
| <span id="257">257</span> |
| <span id="258">258</span> |
| <span id="259">259</span> |
| <span id="260">260</span> |
| <span id="261">261</span> |
| <span id="262">262</span> |
| <span id="263">263</span> |
| <span id="264">264</span> |
| <span id="265">265</span> |
| <span id="266">266</span> |
| <span id="267">267</span> |
| <span id="268">268</span> |
| <span id="269">269</span> |
| <span id="270">270</span> |
| <span id="271">271</span> |
| <span id="272">272</span> |
| <span id="273">273</span> |
| <span id="274">274</span> |
| <span id="275">275</span> |
| <span id="276">276</span> |
| <span id="277">277</span> |
| <span id="278">278</span> |
| <span id="279">279</span> |
| <span id="280">280</span> |
| <span id="281">281</span> |
| <span id="282">282</span> |
| <span id="283">283</span> |
| <span id="284">284</span> |
| <span id="285">285</span> |
| <span id="286">286</span> |
| <span id="287">287</span> |
| <span id="288">288</span> |
| <span id="289">289</span> |
| <span id="290">290</span> |
| <span id="291">291</span> |
| <span id="292">292</span> |
| <span id="293">293</span> |
| <span id="294">294</span> |
| <span id="295">295</span> |
| <span id="296">296</span> |
| <span id="297">297</span> |
| <span id="298">298</span> |
| <span id="299">299</span> |
| <span id="300">300</span> |
| <span id="301">301</span> |
| <span id="302">302</span> |
| <span id="303">303</span> |
| <span id="304">304</span> |
| <span id="305">305</span> |
| <span id="306">306</span> |
| <span id="307">307</span> |
| <span id="308">308</span> |
| <span id="309">309</span> |
| <span id="310">310</span> |
| <span id="311">311</span> |
| <span id="312">312</span> |
| <span id="313">313</span> |
| <span id="314">314</span> |
| <span id="315">315</span> |
| <span id="316">316</span> |
| <span id="317">317</span> |
| <span id="318">318</span> |
| <span id="319">319</span> |
| <span id="320">320</span> |
| <span id="321">321</span> |
| <span id="322">322</span> |
| <span id="323">323</span> |
| <span id="324">324</span> |
| <span id="325">325</span> |
| <span id="326">326</span> |
| <span id="327">327</span> |
| <span id="328">328</span> |
| <span id="329">329</span> |
| <span id="330">330</span> |
| <span id="331">331</span> |
| <span id="332">332</span> |
| <span id="333">333</span> |
| <span id="334">334</span> |
| <span id="335">335</span> |
| <span id="336">336</span> |
| <span id="337">337</span> |
| <span id="338">338</span> |
| <span id="339">339</span> |
| <span id="340">340</span> |
| <span id="341">341</span> |
| <span id="342">342</span> |
| <span id="343">343</span> |
| <span id="344">344</span> |
| <span id="345">345</span> |
| <span id="346">346</span> |
| <span id="347">347</span> |
| <span id="348">348</span> |
| <span id="349">349</span> |
| <span id="350">350</span> |
| <span id="351">351</span> |
| <span id="352">352</span> |
| <span id="353">353</span> |
| <span id="354">354</span> |
| <span id="355">355</span> |
| <span id="356">356</span> |
| <span id="357">357</span> |
| <span id="358">358</span> |
| <span id="359">359</span> |
| <span id="360">360</span> |
| <span id="361">361</span> |
| <span id="362">362</span> |
| <span id="363">363</span> |
| <span id="364">364</span> |
| <span id="365">365</span> |
| <span id="366">366</span> |
| <span id="367">367</span> |
| <span id="368">368</span> |
| <span id="369">369</span> |
| <span id="370">370</span> |
| <span id="371">371</span> |
| <span id="372">372</span> |
| <span id="373">373</span> |
| <span id="374">374</span> |
| <span id="375">375</span> |
| <span id="376">376</span> |
| <span id="377">377</span> |
| <span id="378">378</span> |
| <span id="379">379</span> |
| <span id="380">380</span> |
| <span id="381">381</span> |
| <span id="382">382</span> |
| <span id="383">383</span> |
| <span id="384">384</span> |
| <span id="385">385</span> |
| <span id="386">386</span> |
| <span id="387">387</span> |
| <span id="388">388</span> |
| <span id="389">389</span> |
| <span id="390">390</span> |
| <span id="391">391</span> |
| <span id="392">392</span> |
| <span id="393">393</span> |
| <span id="394">394</span> |
| <span id="395">395</span> |
| <span id="396">396</span> |
| <span id="397">397</span> |
| <span id="398">398</span> |
| <span id="399">399</span> |
| <span id="400">400</span> |
| <span id="401">401</span> |
| <span id="402">402</span> |
| <span id="403">403</span> |
| <span id="404">404</span> |
| <span id="405">405</span> |
| <span id="406">406</span> |
| <span id="407">407</span> |
| <span id="408">408</span> |
| <span id="409">409</span> |
| <span id="410">410</span> |
| <span id="411">411</span> |
| <span id="412">412</span> |
| <span id="413">413</span> |
| <span id="414">414</span> |
| <span id="415">415</span> |
| <span id="416">416</span> |
| <span id="417">417</span> |
| <span id="418">418</span> |
| <span id="419">419</span> |
| <span id="420">420</span> |
| <span id="421">421</span> |
| <span id="422">422</span> |
| <span id="423">423</span> |
| <span id="424">424</span> |
| <span id="425">425</span> |
| <span id="426">426</span> |
| <span id="427">427</span> |
| <span id="428">428</span> |
| <span id="429">429</span> |
| <span id="430">430</span> |
| <span id="431">431</span> |
| <span id="432">432</span> |
| <span id="433">433</span> |
| <span id="434">434</span> |
| <span id="435">435</span> |
| <span id="436">436</span> |
| <span id="437">437</span> |
| <span id="438">438</span> |
| <span id="439">439</span> |
| <span id="440">440</span> |
| <span id="441">441</span> |
| <span id="442">442</span> |
| <span id="443">443</span> |
| <span id="444">444</span> |
| <span id="445">445</span> |
| <span id="446">446</span> |
| <span id="447">447</span> |
| <span id="448">448</span> |
| <span id="449">449</span> |
| <span id="450">450</span> |
| <span id="451">451</span> |
| <span id="452">452</span> |
| <span id="453">453</span> |
| <span id="454">454</span> |
| <span id="455">455</span> |
| <span id="456">456</span> |
| <span id="457">457</span> |
| <span id="458">458</span> |
| <span id="459">459</span> |
| <span id="460">460</span> |
| <span id="461">461</span> |
| <span id="462">462</span> |
| <span id="463">463</span> |
| <span id="464">464</span> |
| <span id="465">465</span> |
| <span id="466">466</span> |
| <span id="467">467</span> |
| <span id="468">468</span> |
| <span id="469">469</span> |
| <span id="470">470</span> |
| <span id="471">471</span> |
| <span id="472">472</span> |
| <span id="473">473</span> |
| <span id="474">474</span> |
| <span id="475">475</span> |
| </pre><pre class="rust"><code><span class="kw">use </span>std::cmp::Ordering; |
| <span class="kw">use </span>std::io::{<span class="self">self</span>, Write}; |
| |
| <span class="kw">use </span>common::{BinarySerializable, CountingWriter, VInt}; |
| <span class="kw">use </span>fail::fail_point; |
| |
| <span class="kw">use </span><span class="kw">super</span>::TermInfo; |
| <span class="kw">use </span><span class="kw">crate</span>::core::Segment; |
| <span class="kw">use </span><span class="kw">crate</span>::directory::{CompositeWrite, WritePtr}; |
| <span class="kw">use </span><span class="kw">crate</span>::fieldnorm::FieldNormReader; |
| <span class="kw">use </span><span class="kw">crate</span>::positions::PositionSerializer; |
| <span class="kw">use </span><span class="kw">crate</span>::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE}; |
| <span class="kw">use </span><span class="kw">crate</span>::postings::skip::SkipSerializer; |
| <span class="kw">use </span><span class="kw">crate</span>::query::Bm25Weight; |
| <span class="kw">use </span><span class="kw">crate</span>::schema::{Field, FieldEntry, FieldType, IndexRecordOption, Schema}; |
| <span class="kw">use </span><span class="kw">crate</span>::termdict::{TermDictionaryBuilder, TermOrdinal}; |
| <span class="kw">use crate</span>::{DocId, Score}; |
| |
| <span class="doccomment">/// `InvertedIndexSerializer` is in charge of serializing |
| /// postings on disk, in the |
| /// * `.idx` (inverted index) |
| /// * `.pos` (positions file) |
| /// * `.term` (term dictionary) |
| /// |
| /// `PostingsWriter` are in charge of pushing the data to the |
| /// serializer. |
| /// |
| /// The serializer expects to receive the following calls |
| /// in this order : |
| /// * `set_field(...)` |
| /// * `new_term(...)` |
| /// * `write_doc(...)` |
| /// * `write_doc(...)` |
| /// * `write_doc(...)` |
| /// * ... |
| /// * `close_term()` |
| /// * `new_term(...)` |
| /// * `write_doc(...)` |
| /// * ... |
| /// * `close_term()` |
| /// * `set_field(...)` |
| /// * ... |
| /// * `close()` |
| /// |
| /// Terms have to be pushed in a lexicographically-sorted order. |
| /// Within a term, documents have to be pushed in increasing order. |
| /// |
| /// A description of the serialization format is |
| /// [available here](https://fulmicoton.gitbooks.io/tantivy-doc/content/inverted-index.html). |
| </span><span class="kw">pub struct </span>InvertedIndexSerializer { |
| terms_write: CompositeWrite<WritePtr>, |
| postings_write: CompositeWrite<WritePtr>, |
| positions_write: CompositeWrite<WritePtr>, |
| schema: Schema, |
| } |
| |
| <span class="kw">impl </span>InvertedIndexSerializer { |
| <span class="doccomment">/// Open a new `InvertedIndexSerializer` for the given segment |
| </span><span class="kw">pub fn </span>open(segment: <span class="kw-2">&mut </span>Segment) -> <span class="kw">crate</span>::Result<InvertedIndexSerializer> { |
| <span class="kw">use </span><span class="kw">crate</span>::SegmentComponent::{Positions, Postings, Terms}; |
| <span class="kw">let </span>inv_index_serializer = InvertedIndexSerializer { |
| terms_write: CompositeWrite::wrap(segment.open_write(Terms)<span class="question-mark">?</span>), |
| postings_write: CompositeWrite::wrap(segment.open_write(Postings)<span class="question-mark">?</span>), |
| positions_write: CompositeWrite::wrap(segment.open_write(Positions)<span class="question-mark">?</span>), |
| schema: segment.schema(), |
| }; |
| <span class="prelude-val">Ok</span>(inv_index_serializer) |
| } |
| |
| <span class="doccomment">/// Must be called before starting pushing terms of |
| /// a given field. |
| /// |
| /// Loads the indexing options for the given field. |
| </span><span class="kw">pub fn </span>new_field( |
| <span class="kw-2">&mut </span><span class="self">self</span>, |
| field: Field, |
| total_num_tokens: u64, |
| fieldnorm_reader: <span class="prelude-ty">Option</span><FieldNormReader>, |
| ) -> io::Result<FieldSerializer> { |
| <span class="kw">let </span>field_entry: <span class="kw-2">&</span>FieldEntry = <span class="self">self</span>.schema.get_field_entry(field); |
| <span class="kw">let </span>term_dictionary_write = <span class="self">self</span>.terms_write.for_field(field); |
| <span class="kw">let </span>postings_write = <span class="self">self</span>.postings_write.for_field(field); |
| <span class="kw">let </span>positions_write = <span class="self">self</span>.positions_write.for_field(field); |
| <span class="kw">let </span>field_type: FieldType = (<span class="kw-2">*</span>field_entry.field_type()).clone(); |
| FieldSerializer::create( |
| <span class="kw-2">&</span>field_type, |
| total_num_tokens, |
| term_dictionary_write, |
| postings_write, |
| positions_write, |
| fieldnorm_reader, |
| ) |
| } |
| |
| <span class="doccomment">/// Closes the serializer. |
| </span><span class="kw">pub fn </span>close(<span class="self">self</span>) -> io::Result<()> { |
| <span class="self">self</span>.terms_write.close()<span class="question-mark">?</span>; |
| <span class="self">self</span>.postings_write.close()<span class="question-mark">?</span>; |
| <span class="self">self</span>.positions_write.close()<span class="question-mark">?</span>; |
| <span class="prelude-val">Ok</span>(()) |
| } |
| } |
| |
| <span class="doccomment">/// The field serializer is in charge of |
| /// the serialization of a specific field. |
| </span><span class="kw">pub struct </span>FieldSerializer<<span class="lifetime">'a</span>> { |
| term_dictionary_builder: TermDictionaryBuilder<<span class="kw-2">&</span><span class="lifetime">'a </span><span class="kw-2">mut </span>CountingWriter<WritePtr>>, |
| postings_serializer: PostingsSerializer<<span class="kw-2">&</span><span class="lifetime">'a </span><span class="kw-2">mut </span>CountingWriter<WritePtr>>, |
| positions_serializer_opt: <span class="prelude-ty">Option</span><PositionSerializer<<span class="kw-2">&</span><span class="lifetime">'a </span><span class="kw-2">mut </span>CountingWriter<WritePtr>>>, |
| current_term_info: TermInfo, |
| term_open: bool, |
| num_terms: TermOrdinal, |
| } |
| |
| <span class="kw">impl</span><<span class="lifetime">'a</span>> FieldSerializer<<span class="lifetime">'a</span>> { |
| <span class="kw">fn </span>create( |
| field_type: <span class="kw-2">&</span>FieldType, |
| total_num_tokens: u64, |
| term_dictionary_write: <span class="kw-2">&</span><span class="lifetime">'a </span><span class="kw-2">mut </span>CountingWriter<WritePtr>, |
| postings_write: <span class="kw-2">&</span><span class="lifetime">'a </span><span class="kw-2">mut </span>CountingWriter<WritePtr>, |
| positions_write: <span class="kw-2">&</span><span class="lifetime">'a </span><span class="kw-2">mut </span>CountingWriter<WritePtr>, |
| fieldnorm_reader: <span class="prelude-ty">Option</span><FieldNormReader>, |
| ) -> io::Result<FieldSerializer<<span class="lifetime">'a</span>>> { |
| total_num_tokens.serialize(postings_write)<span class="question-mark">?</span>; |
| <span class="kw">let </span>index_record_option = field_type |
| .index_record_option() |
| .unwrap_or(IndexRecordOption::Basic); |
| <span class="kw">let </span>term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)<span class="question-mark">?</span>; |
| <span class="kw">let </span>average_fieldnorm = fieldnorm_reader |
| .as_ref() |
| .map(|ff_reader| (total_num_tokens <span class="kw">as </span>Score / ff_reader.num_docs() <span class="kw">as </span>Score)) |
| .unwrap_or(<span class="number">0.0</span>); |
| <span class="kw">let </span>postings_serializer = PostingsSerializer::new( |
| postings_write, |
| average_fieldnorm, |
| index_record_option, |
| fieldnorm_reader, |
| ); |
| <span class="kw">let </span>positions_serializer_opt = <span class="kw">if </span>index_record_option.has_positions() { |
| <span class="prelude-val">Some</span>(PositionSerializer::new(positions_write)) |
| } <span class="kw">else </span>{ |
| <span class="prelude-val">None |
| </span>}; |
| |
| <span class="prelude-val">Ok</span>(FieldSerializer { |
| term_dictionary_builder, |
| postings_serializer, |
| positions_serializer_opt, |
| current_term_info: TermInfo::default(), |
| term_open: <span class="bool-val">false</span>, |
| num_terms: TermOrdinal::default(), |
| }) |
| } |
| |
| <span class="kw">fn </span>current_term_info(<span class="kw-2">&</span><span class="self">self</span>) -> TermInfo { |
| <span class="kw">let </span>positions_start = |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(positions_serializer) = <span class="self">self</span>.positions_serializer_opt.as_ref() { |
| positions_serializer.written_bytes() |
| } <span class="kw">else </span>{ |
| <span class="number">0u64 |
| </span>} <span class="kw">as </span>usize; |
| <span class="kw">let </span>addr = <span class="self">self</span>.postings_serializer.written_bytes() <span class="kw">as </span>usize; |
| TermInfo { |
| doc_freq: <span class="number">0</span>, |
| postings_range: addr..addr, |
| positions_range: positions_start..positions_start, |
| } |
| } |
| |
| <span class="doccomment">/// Starts the postings for a new term. |
| /// * term - the term. It needs to come after the previous term according to the lexicographical |
| /// order. |
| /// * term_doc_freq - return the number of document containing the term. |
| </span><span class="kw">pub fn </span>new_term(<span class="kw-2">&mut </span><span class="self">self</span>, term: <span class="kw-2">&</span>[u8], term_doc_freq: u32) -> io::Result<TermOrdinal> { |
| <span class="macro">assert!</span>( |
| !<span class="self">self</span>.term_open, |
| <span class="string">"Called new_term, while the previous term was not closed." |
| </span>); |
| |
| <span class="self">self</span>.term_open = <span class="bool-val">true</span>; |
| <span class="self">self</span>.postings_serializer.clear(); |
| <span class="self">self</span>.current_term_info = <span class="self">self</span>.current_term_info(); |
| <span class="self">self</span>.term_dictionary_builder.insert_key(term)<span class="question-mark">?</span>; |
| <span class="kw">let </span>term_ordinal = <span class="self">self</span>.num_terms; |
| <span class="self">self</span>.num_terms += <span class="number">1</span>; |
| <span class="self">self</span>.postings_serializer.new_term(term_doc_freq); |
| <span class="prelude-val">Ok</span>(term_ordinal) |
| } |
| |
| <span class="doccomment">/// Serialize the information that a document contains for the current term: |
| /// its term frequency, and the position deltas. |
| /// |
| /// At this point, the positions are already `delta-encoded`. |
| /// For instance, if the positions are `2, 3, 17`, |
| /// `position_deltas` is `2, 1, 14` |
| /// |
| /// Term frequencies and positions may be ignored by the serializer depending |
| /// on the configuration of the field in the `Schema`. |
| </span><span class="kw">pub fn </span>write_doc(<span class="kw-2">&mut </span><span class="self">self</span>, doc_id: DocId, term_freq: u32, position_deltas: <span class="kw-2">&</span>[u32]) { |
| <span class="self">self</span>.current_term_info.doc_freq += <span class="number">1</span>; |
| <span class="self">self</span>.postings_serializer.write_doc(doc_id, term_freq); |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(<span class="kw-2">ref mut </span>positions_serializer) = <span class="self">self</span>.positions_serializer_opt.as_mut() { |
| <span class="macro">assert_eq!</span>(term_freq <span class="kw">as </span>usize, position_deltas.len()); |
| positions_serializer.write_positions_delta(position_deltas); |
| } |
| } |
| |
| <span class="doccomment">/// Finish the serialization for this term postings. |
| /// |
| /// If the current block is incomplete, it needs to be encoded |
| /// using `VInt` encoding. |
| </span><span class="kw">pub fn </span>close_term(<span class="kw-2">&mut </span><span class="self">self</span>) -> io::Result<()> { |
| <span class="macro">fail_point!</span>(<span class="string">"FieldSerializer::close_term"</span>, |msg: <span class="prelude-ty">Option</span><String>| { |
| <span class="prelude-val">Err</span>(io::Error::new(io::ErrorKind::Other, <span class="macro">format!</span>(<span class="string">"{:?}"</span>, msg))) |
| }); |
| <span class="kw">if </span><span class="self">self</span>.term_open { |
| <span class="self">self</span>.postings_serializer |
| .close_term(<span class="self">self</span>.current_term_info.doc_freq)<span class="question-mark">?</span>; |
| <span class="self">self</span>.current_term_info.postings_range.end = |
| <span class="self">self</span>.postings_serializer.written_bytes() <span class="kw">as </span>usize; |
| |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(positions_serializer) = <span class="self">self</span>.positions_serializer_opt.as_mut() { |
| positions_serializer.close_term()<span class="question-mark">?</span>; |
| <span class="self">self</span>.current_term_info.positions_range.end = |
| positions_serializer.written_bytes() <span class="kw">as </span>usize; |
| } |
| <span class="self">self</span>.term_dictionary_builder |
| .insert_value(<span class="kw-2">&</span><span class="self">self</span>.current_term_info)<span class="question-mark">?</span>; |
| <span class="self">self</span>.term_open = <span class="bool-val">false</span>; |
| } |
| <span class="prelude-val">Ok</span>(()) |
| } |
| |
| <span class="doccomment">/// Closes the current field. |
| </span><span class="kw">pub fn </span>close(<span class="kw-2">mut </span><span class="self">self</span>) -> io::Result<()> { |
| <span class="self">self</span>.close_term()<span class="question-mark">?</span>; |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(positions_serializer) = <span class="self">self</span>.positions_serializer_opt { |
| positions_serializer.close()<span class="question-mark">?</span>; |
| } |
| <span class="self">self</span>.postings_serializer.close()<span class="question-mark">?</span>; |
| <span class="self">self</span>.term_dictionary_builder.finish()<span class="question-mark">?</span>; |
| <span class="prelude-val">Ok</span>(()) |
| } |
| } |
| |
| <span class="kw">struct </span>Block { |
| doc_ids: [DocId; COMPRESSION_BLOCK_SIZE], |
| term_freqs: [u32; COMPRESSION_BLOCK_SIZE], |
| len: usize, |
| } |
| |
| <span class="kw">impl </span>Block { |
| <span class="kw">fn </span>new() -> <span class="self">Self </span>{ |
| Block { |
| doc_ids: [<span class="number">0u32</span>; COMPRESSION_BLOCK_SIZE], |
| term_freqs: [<span class="number">0u32</span>; COMPRESSION_BLOCK_SIZE], |
| len: <span class="number">0</span>, |
| } |
| } |
| |
| <span class="kw">fn </span>doc_ids(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="kw-2">&</span>[DocId] { |
| <span class="kw-2">&</span><span class="self">self</span>.doc_ids[..<span class="self">self</span>.len] |
| } |
| |
| <span class="kw">fn </span>term_freqs(<span class="kw-2">&</span><span class="self">self</span>) -> <span class="kw-2">&</span>[u32] { |
| <span class="kw-2">&</span><span class="self">self</span>.term_freqs[..<span class="self">self</span>.len] |
| } |
| |
| <span class="kw">fn </span>clear(<span class="kw-2">&mut </span><span class="self">self</span>) { |
| <span class="self">self</span>.len = <span class="number">0</span>; |
| } |
| |
| <span class="kw">fn </span>append_doc(<span class="kw-2">&mut </span><span class="self">self</span>, doc: DocId, term_freq: u32) { |
| <span class="kw">let </span>len = <span class="self">self</span>.len; |
| <span class="self">self</span>.doc_ids[len] = doc; |
| <span class="self">self</span>.term_freqs[len] = term_freq; |
| <span class="self">self</span>.len = len + <span class="number">1</span>; |
| } |
| |
| <span class="kw">fn </span>is_full(<span class="kw-2">&</span><span class="self">self</span>) -> bool { |
| <span class="self">self</span>.len == COMPRESSION_BLOCK_SIZE |
| } |
| |
| <span class="kw">fn </span>is_empty(<span class="kw-2">&</span><span class="self">self</span>) -> bool { |
| <span class="self">self</span>.len == <span class="number">0 |
| </span>} |
| |
| <span class="kw">fn </span>last_doc(<span class="kw-2">&</span><span class="self">self</span>) -> DocId { |
| <span class="macro">assert_eq!</span>(<span class="self">self</span>.len, COMPRESSION_BLOCK_SIZE); |
| <span class="self">self</span>.doc_ids[COMPRESSION_BLOCK_SIZE - <span class="number">1</span>] |
| } |
| } |
| |
| <span class="kw">pub struct </span>PostingsSerializer<W: Write> { |
| output_write: CountingWriter<W>, |
| last_doc_id_encoded: u32, |
| |
| block_encoder: BlockEncoder, |
| block: Box<Block>, |
| |
| postings_write: Vec<u8>, |
| skip_write: SkipSerializer, |
| |
| mode: IndexRecordOption, |
| fieldnorm_reader: <span class="prelude-ty">Option</span><FieldNormReader>, |
| |
| bm25_weight: <span class="prelude-ty">Option</span><Bm25Weight>, |
| avg_fieldnorm: Score, <span class="comment">/* Average number of term in the field for that segment. |
| * this value is used to compute the block wand information. */ |
| </span>} |
| |
| <span class="kw">impl</span><W: Write> PostingsSerializer<W> { |
| <span class="kw">pub fn </span>new( |
| write: W, |
| avg_fieldnorm: Score, |
| mode: IndexRecordOption, |
| fieldnorm_reader: <span class="prelude-ty">Option</span><FieldNormReader>, |
| ) -> PostingsSerializer<W> { |
| PostingsSerializer { |
| output_write: CountingWriter::wrap(write), |
| |
| block_encoder: BlockEncoder::new(), |
| block: Box::new(Block::new()), |
| |
| postings_write: Vec::new(), |
| skip_write: SkipSerializer::new(), |
| |
| last_doc_id_encoded: <span class="number">0u32</span>, |
| mode, |
| |
| fieldnorm_reader, |
| bm25_weight: <span class="prelude-val">None</span>, |
| avg_fieldnorm, |
| } |
| } |
| |
| <span class="kw">pub fn </span>new_term(<span class="kw-2">&mut </span><span class="self">self</span>, term_doc_freq: u32) { |
| <span class="self">self</span>.bm25_weight = <span class="prelude-val">None</span>; |
| |
| <span class="kw">if </span>!<span class="self">self</span>.mode.has_freq() { |
| <span class="kw">return</span>; |
| } |
| |
| <span class="kw">let </span>num_docs_in_segment: u64 = |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(fieldnorm_reader) = <span class="self">self</span>.fieldnorm_reader.as_ref() { |
| fieldnorm_reader.num_docs() <span class="kw">as </span>u64 |
| } <span class="kw">else </span>{ |
| <span class="kw">return</span>; |
| }; |
| |
| <span class="kw">if </span>num_docs_in_segment == <span class="number">0 </span>{ |
| <span class="kw">return</span>; |
| } |
| |
| <span class="self">self</span>.bm25_weight = <span class="prelude-val">Some</span>(Bm25Weight::for_one_term( |
| term_doc_freq <span class="kw">as </span>u64, |
| num_docs_in_segment, |
| <span class="self">self</span>.avg_fieldnorm, |
| )); |
| } |
| |
| <span class="kw">fn </span>write_block(<span class="kw-2">&mut </span><span class="self">self</span>) { |
| { |
| <span class="comment">// encode the doc ids |
| </span><span class="kw">let </span>(num_bits, block_encoded): (u8, <span class="kw-2">&</span>[u8]) = <span class="self">self |
| </span>.block_encoder |
| .compress_block_sorted(<span class="self">self</span>.block.doc_ids(), <span class="self">self</span>.last_doc_id_encoded); |
| <span class="self">self</span>.last_doc_id_encoded = <span class="self">self</span>.block.last_doc(); |
| <span class="self">self</span>.skip_write |
| .write_doc(<span class="self">self</span>.last_doc_id_encoded, num_bits); |
| <span class="comment">// last el block 0, offset block 1, |
| </span><span class="self">self</span>.postings_write.extend(block_encoded); |
| } |
| <span class="kw">if </span><span class="self">self</span>.mode.has_freq() { |
| <span class="kw">let </span>(num_bits, block_encoded): (u8, <span class="kw-2">&</span>[u8]) = <span class="self">self |
| </span>.block_encoder |
| .compress_block_unsorted(<span class="self">self</span>.block.term_freqs()); |
| <span class="self">self</span>.postings_write.extend(block_encoded); |
| <span class="self">self</span>.skip_write.write_term_freq(num_bits); |
| <span class="kw">if </span><span class="self">self</span>.mode.has_positions() { |
| <span class="comment">// We serialize the sum of term freqs within the skip information |
| // in order to navigate through positions. |
| </span><span class="kw">let </span>sum_freq = <span class="self">self</span>.block.term_freqs().iter().cloned().sum(); |
| <span class="self">self</span>.skip_write.write_total_term_freq(sum_freq); |
| } |
| <span class="kw">let </span><span class="kw-2">mut </span>blockwand_params = (<span class="number">0u8</span>, <span class="number">0u32</span>); |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(bm25_weight) = <span class="self">self</span>.bm25_weight.as_ref() { |
| <span class="kw">if let </span><span class="prelude-val">Some</span>(fieldnorm_reader) = <span class="self">self</span>.fieldnorm_reader.as_ref() { |
| <span class="kw">let </span>docs = <span class="self">self</span>.block.doc_ids().iter().cloned(); |
| <span class="kw">let </span>term_freqs = <span class="self">self</span>.block.term_freqs().iter().cloned(); |
| <span class="kw">let </span>fieldnorms = docs.map(|doc| fieldnorm_reader.fieldnorm_id(doc)); |
| blockwand_params = fieldnorms |
| .zip(term_freqs) |
| .max_by( |
| |(left_fieldnorm_id, left_term_freq), |
| (right_fieldnorm_id, right_term_freq)| { |
| <span class="kw">let </span>left_score = |
| bm25_weight.tf_factor(<span class="kw-2">*</span>left_fieldnorm_id, <span class="kw-2">*</span>left_term_freq); |
| <span class="kw">let </span>right_score = |
| bm25_weight.tf_factor(<span class="kw-2">*</span>right_fieldnorm_id, <span class="kw-2">*</span>right_term_freq); |
| left_score |
| .partial_cmp(<span class="kw-2">&</span>right_score) |
| .unwrap_or(Ordering::Equal) |
| }, |
| ) |
| .unwrap(); |
| } |
| } |
| <span class="kw">let </span>(fieldnorm_id, term_freq) = blockwand_params; |
| <span class="self">self</span>.skip_write.write_blockwand_max(fieldnorm_id, term_freq); |
| } |
| <span class="self">self</span>.block.clear(); |
| } |
| |
| <span class="kw">pub fn </span>write_doc(<span class="kw-2">&mut </span><span class="self">self</span>, doc_id: DocId, term_freq: u32) { |
| <span class="self">self</span>.block.append_doc(doc_id, term_freq); |
| <span class="kw">if </span><span class="self">self</span>.block.is_full() { |
| <span class="self">self</span>.write_block(); |
| } |
| } |
| |
| <span class="kw">fn </span>close(<span class="kw-2">mut </span><span class="self">self</span>) -> io::Result<()> { |
| <span class="self">self</span>.postings_write.flush() |
| } |
| |
| <span class="kw">pub fn </span>close_term(<span class="kw-2">&mut </span><span class="self">self</span>, doc_freq: u32) -> io::Result<()> { |
| <span class="kw">if </span>!<span class="self">self</span>.block.is_empty() { |
| <span class="comment">// we have doc ids waiting to be written |
| // this happens when the number of doc ids is |
| // not a perfect multiple of our block size. |
| // |
| // In that case, the remaining part is encoded |
| // using variable int encoding. |
| </span>{ |
| <span class="kw">let </span>block_encoded = <span class="self">self |
| </span>.block_encoder |
| .compress_vint_sorted(<span class="self">self</span>.block.doc_ids(), <span class="self">self</span>.last_doc_id_encoded); |
| <span class="self">self</span>.postings_write.write_all(block_encoded)<span class="question-mark">?</span>; |
| } |
| <span class="comment">// ... Idem for term frequencies |
| </span><span class="kw">if </span><span class="self">self</span>.mode.has_freq() { |
| <span class="kw">let </span>block_encoded = <span class="self">self |
| </span>.block_encoder |
| .compress_vint_unsorted(<span class="self">self</span>.block.term_freqs()); |
| <span class="self">self</span>.postings_write.write_all(block_encoded)<span class="question-mark">?</span>; |
| } |
| <span class="self">self</span>.block.clear(); |
| } |
| <span class="kw">if </span>doc_freq >= COMPRESSION_BLOCK_SIZE <span class="kw">as </span>u32 { |
| <span class="kw">let </span>skip_data = <span class="self">self</span>.skip_write.data(); |
| VInt(skip_data.len() <span class="kw">as </span>u64).serialize(<span class="kw-2">&mut </span><span class="self">self</span>.output_write)<span class="question-mark">?</span>; |
| <span class="self">self</span>.output_write.write_all(skip_data)<span class="question-mark">?</span>; |
| } |
| <span class="self">self</span>.output_write.write_all(<span class="kw-2">&</span><span class="self">self</span>.postings_write[..])<span class="question-mark">?</span>; |
| <span class="self">self</span>.skip_write.clear(); |
| <span class="self">self</span>.postings_write.clear(); |
| <span class="self">self</span>.bm25_weight = <span class="prelude-val">None</span>; |
| <span class="prelude-val">Ok</span>(()) |
| } |
| |
| <span class="doccomment">/// Returns the number of bytes written in the postings write object |
| /// at this point. |
| /// When called before writing the postings of a term, this value is used as |
| /// start offset. |
| /// When called after writing the postings of a term, this value is used as a |
| /// end offset. |
| </span><span class="kw">fn </span>written_bytes(<span class="kw-2">&</span><span class="self">self</span>) -> u64 { |
| <span class="self">self</span>.output_write.written_bytes() <span class="kw">as </span>u64 |
| } |
| |
| <span class="kw">fn </span>clear(<span class="kw-2">&mut </span><span class="self">self</span>) { |
| <span class="self">self</span>.block.clear(); |
| <span class="self">self</span>.last_doc_id_encoded = <span class="number">0</span>; |
| } |
| } |
| </code></pre></div> |
| </section></div></main><div id="rustdoc-vars" data-root-path="../../../" data-current-crate="tantivy" data-themes="ayu,dark,light" data-resource-suffix="" data-rustdoc-version="1.66.0-nightly (5c8bff74b 2022-10-21)" ></div></body></html> |