blob: bc8923d7f078987b72a41ed35a29b9987880188d [file] [log] [blame]
Index: lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1069496)
+++ lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy)
@@ -53,20 +53,6 @@
* tweaking this is rarely useful.*/
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
- /** Expert: The fraction of TermDocs entries stored in skip tables,
- * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
- * smaller indexes, greater acceleration, but fewer accelerable cases, while
- * smaller values result in bigger indexes, less acceleration and more
- * accelerable cases. More detailed experiments would be useful here. */
- public final int skipInterval = 16;
-
- /** Expert: The maximum number of skip levels. Smaller values result in
- * slightly smaller indexes, but slower skipping in big posting lists.
- */
- public final int maxSkipLevels = 10;
-
-
-
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
this.infoStream = infoStream;
Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (revision 1069496)
+++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (working copy)
@@ -23,6 +23,7 @@
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -44,8 +45,22 @@
final IndexOutput freqOut;
final IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
final int totalNumDocs;
IndexOutput termsOut;
@@ -84,14 +99,11 @@
totalNumDocs = state.numDocs;
- skipListWriter = new DefaultSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new DefaultSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -100,6 +112,7 @@
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -218,7 +231,7 @@
}
lastFreqStart = freqStart;
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1069496)
+++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy)
@@ -47,6 +47,7 @@
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
//private String segment;
@@ -86,6 +87,7 @@
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
// Must keep final because we do non-standard clone
@@ -179,7 +181,7 @@
//System.out.println(" freqFP=" + termState.freqOffset);
assert termState.freqOffset < freqIn.length();
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVInt();
//System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
@@ -378,7 +380,7 @@
@Override
public int advance(int target) throws IOException {
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close.
@@ -528,7 +530,7 @@
//System.out.println("StandardR.D&PE advance target=" + target);
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
@@ -725,7 +727,7 @@
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 1069496)
+++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy)
@@ -209,7 +209,7 @@
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
- if (df >= skipInterval) { // optimized case
+ if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case
if (skipListReader == null)
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 1069496)
+++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy)
@@ -56,6 +56,7 @@
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
@@ -102,6 +103,7 @@
SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
@Override
@@ -231,7 +233,7 @@
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.pos);
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
@@ -344,7 +346,7 @@
}
docFreq = termState.docFreq;
- // NOTE: unused if docFreq < skipInterval:
+ // NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = 0;
@@ -420,13 +422,10 @@
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
- if (docFreq >= skipInterval) {
-
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
// This DocsEnum has never done any skipping
@@ -599,13 +598,10 @@
public int advance(int target) throws IOException {
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
- if (docFreq >= skipInterval) {
-
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
//System.out.println(" create skipper");
Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (revision 1069496)
+++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (working copy)
@@ -21,6 +21,7 @@
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -63,8 +64,23 @@
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
+
final int totalNumDocs;
boolean storePayloads;
@@ -118,15 +134,11 @@
totalNumDocs = state.numDocs;
- // TODO: -- abstraction violation
- skipListWriter = new SepSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new SepSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -136,6 +148,7 @@
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -264,7 +277,7 @@
}
}
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
//System.out.println(" skipFP=" + skipStart);
final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);