| using Lucene.Net.Diagnostics; |
| |
| namespace Lucene.Net.Codecs.Lucene41 |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using IOUtils = Lucene.Net.Util.IOUtils; |
| using SegmentReadState = Lucene.Net.Index.SegmentReadState; |
| using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; |
| |
| /// <summary> |
| /// Lucene 4.1 postings format, which encodes postings in packed integer blocks |
| /// for fast decode. |
| /// |
| /// <para><b>NOTE</b>: this format is still experimental and |
| /// subject to change without backwards compatibility. |
| /// |
| /// <para> |
| /// Basic idea: |
| /// <list type="bullet"> |
| /// <item><description> |
| /// <b>Packed Blocks and VInt Blocks</b>: |
| /// <para>In packed blocks, integers are encoded with the same bit width packed format (<see cref="Util.Packed.PackedInt32s"/>): |
| /// the block size (i.e. number of integers inside block) is fixed (currently 128). Additionally blocks |
| /// that are all the same value are encoded in an optimized way.</para> |
| /// <para>In VInt blocks, integers are encoded as VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>): |
| /// the block size is variable.</para> |
| /// </description></item> |
| /// |
| /// <item><description> |
| /// <b>Block structure</b>: |
| /// <para>When the postings are long enough, Lucene41PostingsFormat will try to encode most integer data |
| /// as a packed block.</para> |
| /// <para>Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed |
| /// blocks, while the remaining 3 are encoded as one VInt block. </para> |
| /// <para>Different kinds of data are always encoded separately into different packed blocks, but may |
| /// possibly be interleaved into the same VInt block. </para> |
| /// <para>This strategy is applied to pairs: |
| /// <document number, frequency>, |
| /// <position, payload length>, |
| /// <position, offset start, offset length>, and |
| /// <position, payload length, offsetstart, offset length>.</para> |
| /// </description></item> |
| /// |
| /// <item><description> |
| /// <b>Skipdata settings</b>: |
| /// <para>The structure of skip table is quite similar to previous version of Lucene. Skip interval is the |
| /// same as block size, and each skip entry points to the beginning of each block. However, for |
| /// the first block, skip data is omitted.</para> |
| /// </description></item> |
| /// |
| /// <item><description> |
| /// <b>Positions, Payloads, and Offsets</b>: |
| /// <para>A position is an integer indicating where the term occurs within one document. |
| /// A payload is a blob of metadata associated with current position. |
| /// An offset is a pair of integers indicating the tokenized start/end offsets for given term |
| /// in current position: it is essentially a specialized payload. </para> |
| /// <para>When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets (assuming a |
| /// null payload contributes one count). As mentioned in block structure, it is possible to encode |
| /// these three either combined or separately.</para> |
| /// <para>In all cases, payloads and offsets are stored together. When encoded as a packed block, |
| /// position data is separated out as .pos, while payloads and offsets are encoded in .pay (payload |
| /// metadata will also be stored directly in .pay). When encoded as VInt blocks, all these three are |
| /// stored interleaved into the .pos (so is payload metadata).</para> |
| /// <para>With this strategy, the majority of payload and offset data will be outside .pos file. |
| /// So for queries that require only position data, running on a full index with payloads and offsets, |
| /// this reduces disk pre-fetches.</para> |
| /// </description></item> |
| /// </list> |
| /// </para> |
| /// |
| /// <para> |
| /// Files and detailed format: |
| /// <list type="bullet"> |
| /// <item><description><c>.tim</c>: <a href="#Termdictionary">Term Dictionary</a></description></item> |
| /// <item><description><c>.tip</c>: <a href="#Termindex">Term Index</a></description></item> |
| /// <item><description><c>.doc</c>: <a href="#Frequencies">Frequencies and Skip Data</a></description></item> |
| /// <item><description><c>.pos</c>: <a href="#Positions">Positions</a></description></item> |
| /// <item><description><c>.pay</c>: <a href="#Payloads">Payloads and Offsets</a></description></item> |
| /// </list> |
| /// </para> |
| /// |
| /// <a name="Termdictionary" id="Termdictionary"></a> |
| /// <dl> |
| /// <dd> |
| /// <b>Term Dictionary</b> |
| /// |
| /// <para>The .tim file contains the list of terms in each |
| /// field along with per-term statistics (such as docfreq) |
| /// and pointers to the frequencies, positions, payload and |
| /// skip data in the .doc, .pos, and .pay files. |
| /// See <see cref="BlockTreeTermsWriter{TSubclassState}"/> for more details on the format. |
| /// </para> |
| /// |
| /// <para>NOTE: The term dictionary can plug into different postings implementations: |
| /// the postings writer/reader are actually responsible for encoding |
| /// and decoding the PostingsHeader and TermMetadata sections described here:</para> |
| /// |
| /// <list type="bullet"> |
| /// <item><description>PostingsHeader --> Header, PackedBlockSize</description></item> |
| /// <item><description>TermMetadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?, PayFPDelta?, |
| /// SkipFPDelta?</description></item> |
| /// <item><description>Header, --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> |
| /// <item><description>PackedBlockSize, SingletonDocID --> VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> |
| /// <item><description>DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> VLong (<see cref="Store.DataOutput.WriteVInt64(long)"/>) </description></item> |
| /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> |
| /// </list> |
| /// <para>Notes:</para> |
| /// <list type="bullet"> |
| /// <item><description>Header is a CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) storing the version information |
| /// for the postings.</description></item> |
| /// <item><description>PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width is |
| /// determined by the largest integer. Smaller block size result in smaller variance among width |
| /// of integers hence smaller indexes. Larger block size result in more efficient bulk i/o hence |
| /// better acceleration. This value should always be a multiple of 64, currently fixed as 128 as |
| /// a tradeoff. It is also the skip interval used to accelerate <see cref="Search.DocIdSetIterator.Advance(int)"/>.</description></item> |
| /// <item><description>DocFPDelta determines the position of this term's TermFreqs within the .doc file. |
| /// In particular, it is the difference of file offset between this term's |
| /// data and previous term's data (or zero, for the first term in the block).On disk it is |
| /// stored as the difference from previous value in sequence. </description></item> |
| /// <item><description>PosFPDelta determines the position of this term's TermPositions within the .pos file. |
| /// While PayFPDelta determines the position of this term's <TermPayloads, TermOffsets?> within |
| /// the .pay file. Similar to DocFPDelta, it is the difference between two file positions (or |
| /// neglected, for fields that omit payloads and offsets).</description></item> |
| /// <item><description>PosVIntBlockFPDelta determines the position of this term's last TermPosition in last pos packed |
| /// block within the .pos file. It is synonym for PayVIntBlockFPDelta or OffsetVIntBlockFPDelta. |
| /// This is actually used to indicate whether it is necessary to load following |
| /// payloads and offsets from .pos instead of .pay. Every time a new block of positions are to be |
| /// loaded, the PostingsReader will use this value to check whether current block is packed format |
| /// or VInt. When packed format, payloads and offsets are fetched from .pay, otherwise from .pos. |
| /// (this value is neglected when total number of positions i.e. totalTermFreq is less or equal |
| /// to PackedBlockSize).</description></item> |
| /// <item><description>SkipFPDelta determines the position of this term's SkipData within the .doc |
| /// file. In particular, it is the length of the TermFreq data. |
| /// SkipDelta is only stored if DocFreq is not smaller than SkipMinimum |
| /// (i.e. 128 in Lucene41PostingsFormat).</description></item> |
| /// <item><description>SingletonDocID is an optimization when a term only appears in one document. In this case, instead |
| /// of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the |
| /// single document ID is written to the term dictionary.</description></item> |
| /// </list> |
| /// </dd> |
| /// </dl> |
| /// |
| /// <a name="Termindex" id="Termindex"></a> |
| /// <dl> |
| /// <dd> |
| /// <b>Term Index</b> |
| /// <para>The .tip file contains an index into the term dictionary, so that it can be |
| /// accessed randomly. See <see cref="BlockTreeTermsWriter{TSubclassState}"/> for more details on the format.</para> |
| /// </dd> |
| /// </dl> |
| /// |
| /// |
| /// <a name="Frequencies" id="Frequencies"></a> |
| /// <dl> |
| /// <dd> |
| /// <b>Frequencies and Skip Data</b> |
| /// |
| /// <para>The .doc file contains the lists of documents which contain each term, along |
| /// with the frequency of the term in that document (except when frequencies are |
| /// omitted: <see cref="Index.IndexOptions.DOCS_ONLY"/>). It also saves skip data to the beginning of |
| /// each packed or VInt block, when the length of document list is larger than packed block size.</para> |
| /// |
| /// <list type="bullet"> |
| /// <item><description>docFile(.doc) --> Header, <TermFreqs, SkipData?><sup>TermCount</sup>, Footer</description></item> |
| /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>)</description></item> |
| /// <item><description>TermFreqs --> <PackedBlock> <sup>PackedDocBlockNum</sup>, |
| /// VIntBlock? </description></item> |
| /// <item><description>PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock?</description></item> |
| /// <item><description>VIntBlock --> <DocDelta[, Freq?]><sup>DocFreq-PackedBlockSize*PackedDocBlockNum</sup></description></item> |
| /// <item><description>SkipData --> <<SkipLevelLength, SkipLevel> |
| /// <sup>NumSkipLevels-1</sup>, SkipLevel>, SkipDatum?</description></item> |
| /// <item><description>SkipLevel --> <SkipDatum> <sup>TrimmedDocFreq/(PackedBlockSize^(Level + 1))</sup></description></item> |
| /// <item><description>SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?, |
| /// PayFPSkip?>?, SkipChildLevelPointer?</description></item> |
| /// <item><description>PackedDocDeltaBlock, PackedFreqBlock --> PackedInts (<see cref="Util.Packed.PackedInt32s"/>) </description></item> |
| /// <item><description>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto, PayFPSkip |
| /// --> |
| /// VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> |
| /// <item><description>SkipChildLevelPointer --> VLong (<see cref="Store.DataOutput.WriteVInt64(long)"/>) </description></item> |
| /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> |
| /// </list> |
| /// <para>Notes:</para> |
| /// <list type="bullet"> |
| /// <item><description>PackedDocDeltaBlock is theoretically generated from two steps: |
| /// <list type="number"> |
| /// <item><description>Calculate the difference between each document number and previous one, |
| /// and get a d-gaps list (for the first document, use absolute value); </description></item> |
| /// <item><description>For those d-gaps from first one to PackedDocBlockNum*PackedBlockSize<sup>th</sup>, |
| /// separately encode as packed blocks.</description></item> |
| /// </list> |
| /// If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step. |
| /// </description></item> |
| /// <item><description>VIntBlock stores remaining d-gaps (along with frequencies when possible) with a format |
| /// that encodes DocDelta and Freq: |
| /// <para>DocDelta: if frequencies are indexed, this determines both the document |
| /// number and the frequency. In particular, DocDelta/2 is the difference between |
| /// this document number and the previous document number (or zero when this is the |
| /// first document in a TermFreqs). When DocDelta is odd, the frequency is one. |
| /// When DocDelta is even, the frequency is read as another VInt. If frequencies |
| /// are omitted, DocDelta contains the gap (not multiplied by 2) between document |
| /// numbers and no frequency information is stored.</para> |
| /// <para>For example, the TermFreqs for a term which occurs once in document seven |
| /// and three times in document eleven, with frequencies indexed, would be the |
| /// following sequence of VInts:</para> |
| /// <para>15, 8, 3</para> |
| /// <para>If frequencies were omitted (<see cref="Index.IndexOptions.DOCS_ONLY"/>) it would be this |
| /// sequence of VInts instead:</para> |
| /// <para>7,4</para> |
| /// </description></item> |
| /// <item><description>PackedDocBlockNum is the number of packed blocks for current term's docids or frequencies. |
| /// In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize) </description></item> |
| /// <item><description>TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq. |
| /// We use this trick since the definition of skip entry is a little different from base interface. |
| /// In <see cref="MultiLevelSkipListWriter"/>, skip data is assumed to be saved for |
| /// skipInterval<sup>th</sup>, 2*skipInterval<sup>th</sup> ... posting in the list. However, |
| /// in Lucene41PostingsFormat, the skip data is saved for skipInterval+1<sup>th</sup>, |
| /// 2*skipInterval+1<sup>th</sup> ... posting (skipInterval==PackedBlockSize in this case). |
| /// When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one |
| /// more skip data than Lucene41SkipWriter. </description></item> |
| /// <item><description>SkipDatum is the metadata of one skip entry. |
| /// For the first block (no matter packed or VInt), it is omitted.</description></item> |
| /// <item><description>DocSkip records the document number of every PackedBlockSize<sup>th</sup> document number in |
| /// the postings (i.e. last document number in each packed block). On disk it is stored as the |
| /// difference from previous value in the sequence. </description></item> |
| /// <item><description>DocFPSkip records the file offsets of each block (excluding )posting at |
| /// PackedBlockSize+1<sup>th</sup>, 2*PackedBlockSize+1<sup>th</sup> ... , in DocFile. |
| /// The file offsets are relative to the start of current term's TermFreqs. |
| /// On disk it is also stored as the difference from previous SkipDatum in the sequence.</description></item> |
| /// <item><description>Since positions and payloads are also block encoded, the skip should skip to related block first, |
| /// then fetch the values according to in-block offset. PosFPSkip and PayFPSkip record the file |
| /// offsets of related block in .pos and .pay, respectively. While PosBlockOffset indicates |
| /// which value to fetch inside the related block (PayBlockOffset is unnecessary since it is always |
| /// equal to PosBlockOffset). Same as DocFPSkip, the file offsets are relative to the start of |
| /// current term's TermFreqs, and stored as a difference sequence.</description></item> |
| /// <item><description>PayByteUpto indicates the start offset of the current payload. It is equivalent to |
| /// the sum of the payload lengths in the current block up to PosBlockOffset</description></item> |
| /// </list> |
| /// </dd> |
| /// </dl> |
| /// |
| /// <a name="Positions" id="Positions"></a> |
| /// <dl> |
| /// <dd> |
| /// <b>Positions</b> |
| /// <para>The .pos file contains the lists of positions that each term occurs at within documents. It also |
| /// sometimes stores part of payloads and offsets for speedup.</para> |
| /// <list type="bullet"> |
| /// <item><description>PosFile(.pos) --> Header, <TermPositions> <sup>TermCount</sup>, Footer</description></item> |
| /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> |
| /// <item><description>TermPositions --> <PackedPosDeltaBlock> <sup>PackedPosBlockNum</sup>, |
| /// VIntBlock? </description></item> |
| /// <item><description>VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?, |
| /// OffsetDelta?, OffsetLength?><sup>PosVIntCount</sup></description></item> |
| /// <item><description>PackedPosDeltaBlock --> PackedInts (<see cref="Util.Packed.PackedInt32s"/>)</description></item> |
| /// <item><description>PositionDelta, OffsetDelta, OffsetLength --> |
| /// VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> |
| /// <item><description>PayloadData --> byte (<see cref="Store.DataOutput.WriteByte(byte)"/>)<sup>PayLength</sup></description></item> |
| /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> |
| /// </list> |
| /// <para>Notes:</para> |
| /// <list type="bullet"> |
| /// <item><description>TermPositions are order by term (terms are implicit, from the term dictionary), and position |
| /// values for each term document pair are incremental, and ordered by document number.</description></item> |
| /// <item><description>PackedPosBlockNum is the number of packed blocks for current term's positions, payloads or offsets. |
| /// In particular, PackedPosBlockNum = floor(totalTermFreq/PackedBlockSize) </description></item> |
| /// <item><description>PosVIntCount is the number of positions encoded as VInt format. In particular, |
| /// PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize</description></item> |
| /// <item><description>The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock |
| /// in chapter <a href="#Frequencies">Frequencies and Skip Data</a>.</description></item> |
| /// <item><description>PositionDelta is, if payloads are disabled for the term's field, the |
| /// difference between the position of the current occurrence in the document and |
| /// the previous occurrence (or zero, if this is the first occurrence in this |
| /// document). If payloads are enabled for the term's field, then PositionDelta/2 |
| /// is the difference between the current and the previous position. If payloads |
| /// are enabled and PositionDelta is odd, then PayloadLength is stored, indicating |
| /// the length of the payload at the current term position.</description></item> |
| /// <item><description>For example, the TermPositions for a term which occurs as the fourth term in |
| /// one document, and as the fifth and ninth term in a subsequent document, would |
| /// be the following sequence of VInts (payloads disabled): |
| /// <para>4, 5, 4</para></description></item> |
| /// <item><description>PayloadData is metadata associated with the current term position. If |
| /// PayloadLength is stored at the current position, then it indicates the length |
| /// of this payload. If PayloadLength is not stored, then this payload has the same |
| /// length as the payload at the previous position.</description></item> |
| /// <item><description>OffsetDelta/2 is the difference between this position's startOffset from the |
| /// previous occurrence (or zero, if this is the first occurrence in this document). |
| /// If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the |
| /// previous occurrence and an OffsetLength follows. Offset data is only written for |
| /// <see cref="Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/>.</description></item> |
| /// </list> |
| /// </dd> |
| /// </dl> |
| /// |
| /// <a name="Payloads" id="Payloads"></a> |
| /// <dl> |
| /// <dd> |
| /// <b>Payloads and Offsets</b> |
| /// <para>The .pay file will store payloads and offsets associated with certain term-document positions. |
| /// Some payloads and offsets will be separated out into .pos file, for performance reasons.</para> |
| /// <list type="bullet"> |
| /// <item><description>PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> <sup>TermCount</sup>, Footer</description></item> |
| /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> |
| /// <item><description>TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> <sup>PackedPayBlockNum</sup></description></item> |
| /// <item><description>TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> <sup>PackedPayBlockNum</sup></description></item> |
| /// <item><description>PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> PackedInts (<see cref="Util.Packed.PackedInt32s"/>) </description></item> |
| /// <item><description>SumPayLength --> VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> |
| /// <item><description>PayData --> byte (<see cref="Store.DataOutput.WriteByte(byte)"/>) <sup>SumPayLength</sup></description></item> |
| /// <item><description>Footer --> CodecFooter (<see cref="CodecUtil.WriteFooter(Store.IndexOutput)"/>) </description></item> |
| /// </list> |
| /// <para>Notes:</para> |
| /// <list type="bullet"> |
| /// <item><description>The order of TermPayloads/TermOffsets will be the same as TermPositions, note that part of |
| /// payload/offsets are stored in .pos.</description></item> |
| /// <item><description>The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is the |
| /// same as PackedFreqBlock in chapter <a href="#Frequencies">Frequencies and Skip Data</a>. |
| /// While PackedStartDeltaBlock follows a same procedure as PackedDocDeltaBlock.</description></item> |
| /// <item><description>PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also synonym |
| /// for PackedOffsetBlockNum.</description></item> |
| /// <item><description>SumPayLength is the total length of payloads written within one block, should be the sum |
| /// of PayLengths in one packed block.</description></item> |
| /// <item><description>PayLength in PackedPayLengthBlock is the length of each payload associated with the current |
| /// position.</description></item> |
| /// </list> |
| /// </dd> |
| /// </dl> |
| /// </para> |
| /// |
| /// @lucene.experimental |
| /// </summary> |
| [PostingsFormatName("Lucene41")] // LUCENENET specific - using PostingsFormatName attribute to ensure the default name passed from subclasses is the same as this class name |
| public sealed class Lucene41PostingsFormat : PostingsFormat |
| { |
| /// <summary> |
| /// Filename extension for document number, frequencies, and skip data. |
| /// See chapter: <a href="#Frequencies">Frequencies and Skip Data</a> |
| /// </summary> |
| public const string DOC_EXTENSION = "doc"; |
| |
| /// <summary> |
| /// Filename extension for positions. |
| /// See chapter: <a href="#Positions">Positions</a> |
| /// </summary> |
| public const string POS_EXTENSION = "pos"; |
| |
| /// <summary> |
| /// Filename extension for payloads and offsets. |
| /// See chapter: <a href="#Payloads">Payloads and Offsets</a> |
| /// </summary> |
| public const string PAY_EXTENSION = "pay"; |
| |
| private readonly int minTermBlockSize; |
| private readonly int maxTermBlockSize; |
| |
| /// <summary> |
| /// Fixed packed block size, number of integers encoded in |
| /// a single packed block. |
| /// </summary> |
| // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding |
| public static int BLOCK_SIZE = 128; |
| |
| /// <summary> |
| /// Creates <see cref="Lucene41PostingsFormat"/> with default |
| /// settings. |
| /// </summary> |
| public Lucene41PostingsFormat() |
| : this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE) |
| { |
| } |
| |
| /// <summary> |
| /// Creates <see cref="Lucene41PostingsFormat"/> with custom |
| /// values for <paramref name="minTermBlockSize"/> and |
| /// <paramref name="maxTermBlockSize"/> passed to block terms dictionary. </summary> |
| /// <seealso cref="BlockTreeTermsWriter{TSubclassState}.BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int,TSubclassState)"/> |
| public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) |
| : base() |
| { |
| this.minTermBlockSize = minTermBlockSize; |
| if (Debugging.AssertsEnabled) Debugging.Assert(minTermBlockSize > 1); |
| this.maxTermBlockSize = maxTermBlockSize; |
| if (Debugging.AssertsEnabled) Debugging.Assert(minTermBlockSize <= maxTermBlockSize); |
| } |
| |
| public override string ToString() |
| { |
| return Name + "(blocksize=" + BLOCK_SIZE + ")"; |
| } |
| |
| public override FieldsConsumer FieldsConsumer(SegmentWriteState state) |
| { |
| PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state); |
| |
| bool success = false; |
| try |
| { |
| FieldsConsumer ret = new BlockTreeTermsWriter<object>(state, postingsWriter, minTermBlockSize, maxTermBlockSize, subclassState: null); |
| success = true; |
| return ret; |
| } |
| finally |
| { |
| if (!success) |
| { |
| IOUtils.DisposeWhileHandlingException(postingsWriter); |
| } |
| } |
| } |
| |
| public override FieldsProducer FieldsProducer(SegmentReadState state) |
| { |
| PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); |
| bool success = false; |
| try |
| { |
| FieldsProducer ret = new BlockTreeTermsReader<object>(state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor, subclassState: null); |
| success = true; |
| return ret; |
| } |
| finally |
| { |
| if (!success) |
| { |
| IOUtils.DisposeWhileHandlingException(postingsReader); |
| } |
| } |
| } |
| } |
| } |