| Index: lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1069496)
|
| +++ lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy)
|
| @@ -53,20 +53,6 @@
|
| * tweaking this is rarely useful.*/ |
| public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC |
| |
| - /** Expert: The fraction of TermDocs entries stored in skip tables, |
| - * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in |
| - * smaller indexes, greater acceleration, but fewer accelerable cases, while |
| - * smaller values result in bigger indexes, less acceleration and more |
| - * accelerable cases. More detailed experiments would be useful here. */ |
| - public final int skipInterval = 16; |
| - |
| - /** Expert: The maximum number of skip levels. Smaller values result in |
| - * slightly smaller indexes, but slower skipping in big posting lists. |
| - */ |
| - public final int maxSkipLevels = 10; |
| - |
| - |
| - |
| public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, |
| int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) { |
| this.infoStream = infoStream; |
| Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (revision 1069496)
|
| +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (working copy)
|
| @@ -23,6 +23,7 @@
|
| import java.io.IOException; |
| |
| import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.DocsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentWriteState; |
| @@ -44,8 +45,22 @@
|
| final IndexOutput freqOut; |
| final IndexOutput proxOut; |
| final DefaultSkipListWriter skipListWriter; |
| - final int skipInterval; |
| - final int maxSkipLevels; |
| + /** Expert: The fraction of TermDocs entries stored in skip tables, |
| + * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in |
| + * smaller indexes, greater acceleration, but fewer accelerable cases, while |
| + * smaller values result in bigger indexes, less acceleration and more |
| + * accelerable cases. More detailed experiments would be useful here. */ |
| + final int skipInterval = 16; |
| + |
| + /** |
| + * Expert: minimum docFreq to write any skip data at all |
| + */ |
| + final int skipMinimum = skipInterval; |
| + |
| + /** Expert: The maximum number of skip levels. Smaller values result in |
| + * slightly smaller indexes, but slower skipping in big posting lists. |
| + */ |
| + final int maxSkipLevels = 10; |
| final int totalNumDocs; |
| IndexOutput termsOut; |
| |
| @@ -84,14 +99,11 @@
|
| |
| totalNumDocs = state.numDocs; |
| |
| - skipListWriter = new DefaultSkipListWriter(state.skipInterval, |
| - state.maxSkipLevels, |
| + skipListWriter = new DefaultSkipListWriter(skipInterval, |
| + maxSkipLevels, |
| state.numDocs, |
| freqOut, |
| proxOut); |
| - |
| - skipInterval = state.skipInterval; |
| - maxSkipLevels = state.maxSkipLevels; |
| } |
| |
| @Override |
| @@ -100,6 +112,7 @@
|
| CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); |
| termsOut.writeInt(skipInterval); // write skipInterval |
| termsOut.writeInt(maxSkipLevels); // write maxSkipLevels |
| + termsOut.writeInt(skipMinimum); // write skipMinimum |
| } |
| |
| @Override |
| @@ -218,7 +231,7 @@
|
| } |
| lastFreqStart = freqStart; |
| |
| - if (df >= skipInterval) { |
| + if (df >= skipMinimum) { |
| bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart)); |
| } |
| |
| Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1069496)
|
| +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy)
|
| @@ -47,6 +47,7 @@
|
| |
| int skipInterval; |
| int maxSkipLevels; |
| + int skipMinimum; |
| |
| //private String segment; |
| |
| @@ -86,6 +87,7 @@
|
| |
| skipInterval = termsIn.readInt(); |
| maxSkipLevels = termsIn.readInt(); |
| + skipMinimum = termsIn.readInt(); |
| } |
| |
| // Must keep final because we do non-standard clone |
| @@ -179,7 +181,7 @@
|
| //System.out.println(" freqFP=" + termState.freqOffset); |
| assert termState.freqOffset < freqIn.length(); |
| |
| - if (termState.docFreq >= skipInterval) { |
| + if (termState.docFreq >= skipMinimum) { |
| termState.skipOffset = termState.bytesReader.readVInt(); |
| //System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length()); |
| assert termState.freqOffset + termState.skipOffset < freqIn.length(); |
| @@ -378,7 +380,7 @@
|
| @Override |
| public int advance(int target) throws IOException { |
| |
| - if ((target - skipInterval) >= doc && limit >= skipInterval) { |
| + if ((target - skipInterval) >= doc && limit >= skipMinimum) { |
| |
| // There are enough docs in the posting to have |
| // skip data, and it isn't too close. |
| @@ -528,7 +530,7 @@
|
| |
| //System.out.println("StandardR.D&PE advance target=" + target); |
| |
| - if ((target - skipInterval) >= doc && limit >= skipInterval) { |
| + if ((target - skipInterval) >= doc && limit >= skipMinimum) { |
| |
| // There are enough docs in the posting to have |
| // skip data, and it isn't too close |
| @@ -725,7 +727,7 @@
|
| |
| //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this); |
| |
| - if ((target - skipInterval) >= doc && limit >= skipInterval) { |
| + if ((target - skipInterval) >= doc && limit >= skipMinimum) { |
| |
| // There are enough docs in the posting to have |
| // skip data, and it isn't too close |
| Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 1069496)
|
| +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy)
|
| @@ -209,7 +209,7 @@
|
| |
| /** Optimized implementation. */ |
| public boolean skipTo(int target) throws IOException { |
| - if (df >= skipInterval) { // optimized case |
| + if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case |
| if (skipListReader == null) |
| skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone |
| |
| Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 1069496)
|
| +++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy)
|
| @@ -56,6 +56,7 @@
|
| |
| int skipInterval; |
| int maxSkipLevels; |
| + int skipMinimum; |
| |
| public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException { |
| |
| @@ -102,6 +103,7 @@
|
| SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START); |
| skipInterval = termsIn.readInt(); |
| maxSkipLevels = termsIn.readInt(); |
| + skipMinimum = termsIn.readInt(); |
| } |
| |
| @Override |
| @@ -231,7 +233,7 @@
|
| //System.out.println(" payloadFP=" + termState.payloadFP); |
| } |
| } |
| - if (termState.docFreq >= skipInterval) { |
| + if (termState.docFreq >= skipMinimum) { |
| //System.out.println(" readSkip @ " + termState.bytesReader.pos); |
| if (isFirstTerm) { |
| termState.skipFP = termState.bytesReader.readVLong(); |
| @@ -344,7 +346,7 @@
|
| } |
| |
| docFreq = termState.docFreq; |
| - // NOTE: unused if docFreq < skipInterval: |
| + // NOTE: unused if docFreq < skipMinimum: |
| skipFP = termState.skipFP; |
| count = 0; |
| doc = 0; |
| @@ -420,13 +422,10 @@
|
| @Override |
| public int advance(int target) throws IOException { |
| |
| - // TODO: jump right to next() if target is < X away |
| - // from where we are now? |
| + if ((target - skipInterval) >= doc && docFreq >= skipMinimum) { |
| |
| - if (docFreq >= skipInterval) { |
| - |
| // There are enough docs in the posting to have |
| - // skip data |
| + // skip data, and its not too close |
| |
| if (skipper == null) { |
| // This DocsEnum has never done any skipping |
| @@ -599,13 +598,10 @@
|
| public int advance(int target) throws IOException { |
| //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this); |
| |
| - // TODO: jump right to next() if target is < X away |
| - // from where we are now? |
| + if ((target - skipInterval) >= doc && docFreq >= skipMinimum) { |
| |
| - if (docFreq >= skipInterval) { |
| - |
| // There are enough docs in the posting to have |
| - // skip data |
| + // skip data, and its not too close |
| |
| if (skipper == null) { |
| //System.out.println(" create skipper"); |
| Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
|
| ===================================================================
|
| --- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (revision 1069496)
|
| +++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (working copy)
|
| @@ -21,6 +21,7 @@
|
| import java.util.Set; |
| |
| import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.DocsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentWriteState; |
| @@ -63,8 +64,23 @@
|
| IndexOutput termsOut; |
| |
| final SepSkipListWriter skipListWriter; |
| - final int skipInterval; |
| - final int maxSkipLevels; |
| + /** Expert: The fraction of TermDocs entries stored in skip tables, |
| + * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in |
| + * smaller indexes, greater acceleration, but fewer accelerable cases, while |
| + * smaller values result in bigger indexes, less acceleration and more |
| + * accelerable cases. More detailed experiments would be useful here. */ |
| + final int skipInterval = 16; |
| + |
| + /** |
| + * Expert: minimum docFreq to write any skip data at all |
| + */ |
| + final int skipMinimum = skipInterval; |
| + |
| + /** Expert: The maximum number of skip levels. Smaller values result in |
| + * slightly smaller indexes, but slower skipping in big posting lists. |
| + */ |
| + final int maxSkipLevels = 10; |
| + |
| final int totalNumDocs; |
| |
| boolean storePayloads; |
| @@ -118,15 +134,11 @@
|
| |
| totalNumDocs = state.numDocs; |
| |
| - // TODO: -- abstraction violation |
| - skipListWriter = new SepSkipListWriter(state.skipInterval, |
| - state.maxSkipLevels, |
| + skipListWriter = new SepSkipListWriter(skipInterval, |
| + maxSkipLevels, |
| state.numDocs, |
| freqOut, docOut, |
| posOut, payloadOut); |
| - |
| - skipInterval = state.skipInterval; |
| - maxSkipLevels = state.maxSkipLevels; |
| } |
| |
| @Override |
| @@ -136,6 +148,7 @@
|
| // TODO: -- just ask skipper to "start" here |
| termsOut.writeInt(skipInterval); // write skipInterval |
| termsOut.writeInt(maxSkipLevels); // write maxSkipLevels |
| + termsOut.writeInt(skipMinimum); // write skipMinimum |
| } |
| |
| @Override |
| @@ -264,7 +277,7 @@
|
| } |
| } |
| |
| - if (df >= skipInterval) { |
| + if (df >= skipMinimum) { |
| //System.out.println(" skipFP=" + skipStart); |
| final long skipFP = skipOut.getFilePointer(); |
| skipListWriter.writeSkip(skipOut); |