| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java |
| index 17024d8..a405ccb 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java |
| @@ -32,12 +32,14 @@ import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Accountable; |
| @@ -659,6 +661,12 @@ public class BlockTermsReader extends FieldsProducer { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + decodeMetaData(); |
| + return postingsReader.impacts(fieldInfo, state, scorer, flags); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef target, TermState otherState) { |
| //System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this); |
| assert otherState != null && otherState instanceof BlockTermState; |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java |
| index f19cd2c..9ed87b5 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java |
| @@ -26,6 +26,7 @@ import java.util.List; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.codecs.TermStats; |
| import org.apache.lucene.index.IndexOptions; |
| @@ -127,7 +128,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| |
| for(String field : fields) { |
| |
| @@ -146,7 +147,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { |
| break; |
| } |
| |
| - termsWriter.write(term, termsEnum); |
| + termsWriter.write(term, termsEnum, norms); |
| } |
| |
| termsWriter.finish(); |
| @@ -232,9 +233,9 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { |
| |
| private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder(); |
| |
| - void write(BytesRef text, TermsEnum termsEnum) throws IOException { |
| + void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException { |
| |
| - BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen); |
| + BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms); |
| if (state == null) { |
| // No docs for this term: |
| return; |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java |
| index b16bb15..bdc0738 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java |
| @@ -24,6 +24,7 @@ import java.util.List; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; // javadocs |
| import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; |
| @@ -213,7 +214,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| |
| String lastField = null; |
| for(String field : fields) { |
| @@ -233,7 +234,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { |
| if (term == null) { |
| break; |
| } |
| - termsWriter.write(term, termsEnum); |
| + termsWriter.write(term, termsEnum, norms); |
| } |
| |
| termsWriter.finish(); |
| @@ -771,7 +772,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| /** Writes one term's worth of postings. */ |
| - public void write(BytesRef text, TermsEnum termsEnum) throws IOException { |
| + public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException { |
| /* |
| if (DEBUG) { |
| int[] tmp = new int[lastTerm.length]; |
| @@ -780,7 +781,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { |
| } |
| */ |
| |
| - BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen); |
| + BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms); |
| if (state != null) { |
| assert state.docFreq != 0; |
| assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter; |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java |
| index 6c2d2bc..fdb54df 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java |
| @@ -20,9 +20,11 @@ package org.apache.lucene.codecs.blocktreeords; |
| import java.io.IOException; |
| |
| import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| @@ -205,6 +207,12 @@ final class OrdsIntersectTermsEnum extends TermsEnum { |
| return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, reuse, flags); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + currentFrame.decodeMetaData(); |
| + return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags); |
| + } |
| + |
| private int getState() { |
| int state = currentFrame.state; |
| for(int idx=0;idx<currentFrame.suffix;idx++) { |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java |
| index 9a9d871..8d55a1d 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java |
| @@ -25,9 +25,11 @@ import java.io.PrintStream; |
| |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| @@ -934,6 +936,19 @@ public final class OrdsSegmentTermsEnum extends TermsEnum { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + assert !eof; |
| + //if (DEBUG) { |
| + //System.out.println("BTTR.docs seg=" + segment); |
| + //} |
| + currentFrame.decodeMetaData(); |
| + //if (DEBUG) { |
| + //System.out.println(" state=" + currentFrame.state); |
| + //} |
| + return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef target, TermState otherState) { |
| // if (DEBUG) { |
| // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState); |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java |
| index b864bf0..b826102 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java |
| @@ -29,16 +29,19 @@ import java.util.Map.Entry; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.Fields; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.IndexOutput; |
| @@ -371,6 +374,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { |
| return delegate().postings(reuse, flags); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return delegate().impacts(scorer, flags); |
| + } |
| } |
| |
| @Override |
| @@ -416,7 +423,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| |
| // Delegate must write first: it may have opened files |
| // on creating the class |
| @@ -424,7 +431,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { |
| // close them; alternatively, if we delayed pulling |
| // the fields consumer until here, we could do it |
| // afterwards: |
| - delegateFieldsConsumer.write(fields); |
| + delegateFieldsConsumer.write(fields, norms); |
| |
| for(String field : fields) { |
| Terms terms = fields.terms(field); |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java |
| index 00f25cf..901e1ae 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java |
| @@ -29,14 +29,17 @@ import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.Fields; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.OrdTermState; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SegmentWriteState; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.RAMOutputStream; |
| import org.apache.lucene.util.Accountable; |
| @@ -944,6 +947,10 @@ public final class DirectPostingsFormat extends PostingsFormat { |
| } |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| } |
| |
| private final class DirectIntersectTermsEnum extends TermsEnum { |
| @@ -1496,6 +1503,11 @@ public final class DirectPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| + |
| + @Override |
| public SeekStatus seekCeil(BytesRef term) { |
| throw new UnsupportedOperationException(); |
| } |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java |
| index 5ba4c5f..4ecf4d6 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java |
| @@ -34,6 +34,7 @@ import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.SegmentInfo; |
| @@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| @@ -432,6 +434,12 @@ public class FSTOrdTermsReader extends FieldsProducer { |
| return postingsReader.postings(fieldInfo, state, reuse, flags); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + decodeMetaData(); |
| + return postingsReader.impacts(fieldInfo, state, scorer, flags); |
| + } |
| + |
| // TODO: this can be achieved by making use of Util.getByOutput() |
| // and should have related tests |
| @Override |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java |
| index cbe6583..b59d41d 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java |
| @@ -24,6 +24,7 @@ import java.util.List; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| @@ -186,7 +187,7 @@ public class FSTOrdTermsWriter extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| for(String field : fields) { |
| Terms terms = fields.terms(field); |
| if (terms == null) { |
| @@ -205,7 +206,7 @@ public class FSTOrdTermsWriter extends FieldsConsumer { |
| if (term == null) { |
| break; |
| } |
| - BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen); |
| + BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms); |
| if (termState != null) { |
| termsWriter.finishTerm(term, termState); |
| sumTotalTermFreq += termState.totalTermFreq; |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java |
| index 8dda05c..b1b61e1 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java |
| @@ -34,6 +34,7 @@ import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.SegmentInfo; |
| @@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Accountable; |
| @@ -299,6 +301,12 @@ public class FSTTermsReader extends FieldsProducer { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + decodeMetaData(); |
| + return postingsReader.impacts(fieldInfo, state, scorer, flags); |
| + } |
| + |
| + @Override |
| public void seekExact(long ord) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java |
| index 8284d74..8e55d41 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java |
| @@ -24,6 +24,7 @@ import java.util.List; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.FieldInfo; |
| @@ -158,7 +159,7 @@ public class FSTTermsWriter extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| for(String field : fields) { |
| Terms terms = fields.terms(field); |
| if (terms == null) { |
| @@ -179,7 +180,7 @@ public class FSTTermsWriter extends FieldsConsumer { |
| break; |
| } |
| |
| - BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen); |
| + BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms); |
| if (termState != null) { |
| termsWriter.finishTerm(term, termState); |
| sumTotalTermFreq += termState.totalTermFreq; |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java |
| index b81e56e..855002c 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java |
| @@ -29,7 +29,7 @@ import java.util.concurrent.atomic.AtomicLong; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.DocValuesProducer; |
| import org.apache.lucene.index.*; |
| -import org.apache.lucene.index.SortedDocValues; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| @@ -870,5 +870,9 @@ class MemoryDocValuesProducer extends DocValuesProducer { |
| throw new UnsupportedOperationException(); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| } |
| } |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java |
| index 21983c6..0df7d92 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java |
| @@ -27,6 +27,7 @@ import java.util.TreeMap; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.TermStats; |
| import org.apache.lucene.index.CorruptIndexException; |
| @@ -34,12 +35,15 @@ import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.Fields; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SegmentWriteState; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IOContext; |
| @@ -287,7 +291,7 @@ public final class MemoryPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| for(String field : fields) { |
| |
| Terms terms = fields.terms(field); |
| @@ -815,6 +819,11 @@ public final class MemoryPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| + |
| + @Override |
| public BytesRef term() { |
| return current.input; |
| } |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java |
| index f5504b3..22aeb5c 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java |
| @@ -29,11 +29,14 @@ import java.util.TreeMap; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.SegmentReadState; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.BufferedChecksumIndexInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| @@ -230,6 +233,10 @@ class SimpleTextFieldsReader extends FieldsProducer { |
| return docsEnum.reset(docsStart, indexOptions == IndexOptions.DOCS, docFreq); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| } |
| |
| private class SimpleTextDocsEnum extends PostingsEnum { |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java |
| index 2023552..d8299b5 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java |
| @@ -20,6 +20,7 @@ package org.apache.lucene.codecs.simpletext; |
| import java.io.IOException; |
| |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| @@ -56,7 +57,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| write(writeState.fieldInfos, fields); |
| } |
| |
| diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java |
| index 25f2a4d..ee0757d 100644 |
| --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java |
| +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java |
| @@ -27,10 +27,13 @@ import java.util.TreeMap; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.Fields; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.BufferedChecksumIndexInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| @@ -410,6 +413,10 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { |
| return e; |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| } |
| |
| // note: these two enum classes are exactly like the Default impl... |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveFreqNormAccumulator.java b/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveFreqNormAccumulator.java |
| new file mode 100644 |
| index 0000000..3dd9d35 |
| --- /dev/null |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveFreqNormAccumulator.java |
| @@ -0,0 +1,163 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs; |
| + |
| +import java.util.Arrays; |
| +import java.util.Collections; |
| +import java.util.Comparator; |
| +import java.util.Iterator; |
| +import java.util.SortedSet; |
| +import java.util.TreeSet; |
| + |
| +/** |
| + * This class accumulates the (freq, norm) pairs that may produce competitive scores. |
| + */ |
| +public final class CompetitiveFreqNormAccumulator { |
| + |
| + // We speed up accumulation for common norm values by first computing |
| + // the max freq for all norms in -128..127 |
| + private final int[] maxFreqs; |
| + private boolean dirty; |
| + private final TreeSet<FreqAndNorm> freqNormPairs; |
| + |
| + /** Sole constructor. */ |
| + public CompetitiveFreqNormAccumulator() { |
| + maxFreqs = new int[256]; |
| + Comparator<FreqAndNorm> comparator = new Comparator<CompetitiveFreqNormAccumulator.FreqAndNorm>() { |
| + @Override |
| + public int compare(FreqAndNorm o1, FreqAndNorm o2) { |
| + // greater freqs compare greater |
| + int cmp = Integer.compare(o1.freq, o2.freq); |
| + if (cmp == 0) { |
| + // greater norms compare lower |
| + cmp = Long.compareUnsigned(o2.norm, o1.norm); |
| + } |
| + return cmp; |
| + } |
| + }; |
| + freqNormPairs = new TreeSet<>(comparator); |
| + } |
| + |
| + /** Reset to the same state it was in after creation. */ |
| + public void clear() { |
| + Arrays.fill(maxFreqs, 0); |
| + dirty = false; |
| + freqNormPairs.clear(); |
| + } |
| + |
| + /** |
| + * A (freq, norm) pair. |
| + */ |
| + public static class FreqAndNorm { |
| + /** Doc-term frequency. */ |
| + public final int freq; |
| + /** Normalization factor. */ |
| + public final long norm; |
| + |
| + /** Sole constructor. */ |
| + public FreqAndNorm(int freq, long norm) { |
| + this.freq = freq; |
| + this.norm = norm; |
| + } |
| + |
| + @Override |
| + public boolean equals(Object obj) { |
| + if (obj == null || obj instanceof FreqAndNorm == false) { |
| + return false; |
| + } |
| + FreqAndNorm that = (FreqAndNorm) obj; |
| + return freq == that.freq && norm == that.norm; |
| + } |
| + |
| + @Override |
| + public int hashCode() { |
| + int h = getClass().hashCode(); |
| + h = 31 * h + freq; |
| + h = 31 * h + Long.hashCode(norm); |
| + return h; |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return "{" + freq + "," + norm + "}"; |
| + } |
| + } |
| + |
| + /** Accumulate a (freq,norm) pair, updating this structure if there is no |
| + * equivalent or more competitive entry already. */ |
| + public void add(int freq, long norm) { |
| + if (norm >= Byte.MIN_VALUE && norm <= Byte.MAX_VALUE) { |
| + int index = Byte.toUnsignedInt((byte) norm); |
| + maxFreqs[index] = Math.max(maxFreqs[index], freq); |
| + dirty = true; |
| + } else { |
| + add(new FreqAndNorm(freq, norm)); |
| + } |
| + } |
| + |
| + /** Merge {@code acc} into this. */ |
| + public void addAll(CompetitiveFreqNormAccumulator acc) { |
| + for (FreqAndNorm entry : acc.getCompetitiveFreqNormPairs()) { |
| + add(entry); |
| + } |
| + } |
| + |
| + /** Get the set of competitive freq and norm pairs, orderer by increasing freq and norm. */ |
| + public SortedSet<FreqAndNorm> getCompetitiveFreqNormPairs() { |
| + if (dirty) { |
| + for (int i = 0; i < maxFreqs.length; ++i) { |
| + if (maxFreqs[i] > 0) { |
| + add(new FreqAndNorm(maxFreqs[i], (byte) i)); |
| + maxFreqs[i] = 0; |
| + } |
| + } |
| + dirty = false; |
| + } |
| + return Collections.unmodifiableSortedSet(freqNormPairs); |
| + } |
| + |
| + private void add(FreqAndNorm newEntry) { |
| + FreqAndNorm next = freqNormPairs.ceiling(newEntry); |
| + if (next == null) { |
| + // nothing is more competitive |
| + freqNormPairs.add(newEntry); |
| + } else if (Long.compareUnsigned(next.norm, newEntry.norm) <= 0) { |
| + // we already have this entry or more competitive entries in the tree |
| + return; |
| + } else { |
| + // some entries have a greater freq but a less competitive norm, so we |
| + // don't know which one will trigger greater scores, still add to the tree |
| + freqNormPairs.add(newEntry); |
| + } |
| + |
| + for (Iterator<FreqAndNorm> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) { |
| + FreqAndNorm entry = it.next(); |
| + if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) { |
| + // less competitive |
| + it.remove(); |
| + } else { |
| + // lesser freq but better norm, further entries are not comparable |
| + break; |
| + } |
| + } |
| + } |
| + |
| + @Override |
| + public String toString() { |
| + return getCompetitiveFreqNormPairs().toString(); |
| + } |
| +} |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java |
| index 28bae1d..f4fc9ac 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java |
| @@ -76,14 +76,14 @@ public abstract class FieldsConsumer implements Closeable { |
| * live docs when pulling docs/positions enums. |
| * </ul> |
| */ |
| - public abstract void write(Fields fields) throws IOException; |
| + public abstract void write(Fields fields, NormsProducer norms) throws IOException; |
| |
| /** Merges in the fields from the readers in |
| * <code>mergeState</code>. The default implementation skips |
| - * and maps around deleted documents, and calls {@link #write(Fields)}. |
| + * and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}. |
| * Implementations can override this method for more sophisticated |
| * merging (bulk-byte copying, etc). */ |
| - public void merge(MergeState mergeState) throws IOException { |
| + public void merge(MergeState mergeState, NormsProducer norms) throws IOException { |
| final List<Fields> fields = new ArrayList<>(); |
| final List<ReaderSlice> slices = new ArrayList<>(); |
| |
| @@ -102,7 +102,7 @@ public abstract class FieldsConsumer implements Closeable { |
| Fields mergedFields = new MappedMultiFields(mergeState, |
| new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), |
| slices.toArray(ReaderSlice.EMPTY_ARRAY))); |
| - write(mergedFields); |
| + write(mergedFields, norms); |
| } |
| |
| // NOTE: strange but necessary so javadocs linting is happy: |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java |
| index c937886..517c731 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java |
| @@ -40,8 +40,8 @@ public abstract class MultiLevelSkipListReader implements Closeable { |
| /** the maximum number of skip levels possible for this index */ |
| protected int maxNumberOfSkipLevels; |
| |
| - // number of levels in this skip list |
| - private int numberOfSkipLevels; |
| + /** number of levels in this skip list */ |
| + protected int numberOfSkipLevels; |
| |
| // Expert: defines the number of top skip levels to buffer in memory. |
| // Reducing this number results in less memory usage, but possibly |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java |
| index 207b324..8e090be 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java |
| @@ -53,13 +53,13 @@ import org.apache.lucene.util.MathUtil; |
| |
| public abstract class MultiLevelSkipListWriter { |
| /** number of levels in this skip list */ |
| - protected int numberOfSkipLevels; |
| + protected final int numberOfSkipLevels; |
| |
| /** the skip interval in the list with level = 0 */ |
| - private int skipInterval; |
| + private final int skipInterval; |
| |
| /** skipInterval used for level > 0 */ |
| - private int skipMultiplier; |
| + private final int skipMultiplier; |
| |
| /** for every skip level a different buffer is used */ |
| private RAMOutputStream[] skipBuffer; |
| @@ -69,6 +69,7 @@ public abstract class MultiLevelSkipListWriter { |
| this.skipInterval = skipInterval; |
| this.skipMultiplier = skipMultiplier; |
| |
| + int numberOfSkipLevels; |
| // calculate the maximum number of skip levels for this document frequency |
| if (df <= skipInterval) { |
| numberOfSkipLevels = 1; |
| @@ -80,6 +81,7 @@ public abstract class MultiLevelSkipListWriter { |
| if (numberOfSkipLevels > maxSkipLevels) { |
| numberOfSkipLevels = maxSkipLevels; |
| } |
| + this.numberOfSkipLevels = numberOfSkipLevels; |
| } |
| |
| /** Creates a {@code MultiLevelSkipListWriter}, where |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java |
| index 56cbab5..ca403fa 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java |
| @@ -22,7 +22,9 @@ import java.io.IOException; |
| |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.SegmentReadState; |
| +import org.apache.lucene.search.similarities.Similarity; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Accountable; |
| @@ -65,7 +67,13 @@ public abstract class PostingsReaderBase implements Closeable, Accountable { |
| /** Must fully consume state, since after this call that |
| * TermState may be reused. */ |
| public abstract PostingsEnum postings(FieldInfo fieldInfo, BlockTermState state, PostingsEnum reuse, int flags) throws IOException; |
| - |
| + |
| + /** |
| + * Return a {@link ImpactsEnum} that computes impacts with {@code scorer}. |
| + * @see #postings(FieldInfo, BlockTermState, PostingsEnum, int) |
| + */ |
| + public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, Similarity.SimScorer scorer, int flags) throws IOException; |
| + |
| /** |
| * Checks consistency of this reader. |
| * <p> |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java |
| index b4f2d4e..48c6027 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java |
| @@ -60,7 +60,7 @@ public abstract class PostingsWriterBase implements Closeable { |
| * FixedBitSet} for every docID written. If no docs |
| * were written, this method should return null, and the |
| * terms dict will skip the term. */ |
| - public abstract BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen) throws IOException; |
| + public abstract BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException; |
| |
| /** |
| * Encode metadata as long[] and byte[]. {@code absolute} controls whether |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java |
| index 1fb83b9..98bdd91 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java |
| @@ -22,6 +22,7 @@ import java.io.IOException; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexOptions; |
| +import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.FixedBitSet; |
| @@ -74,7 +75,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { |
| /** Start a new term. Note that a matching call to {@link |
| * #finishTerm(BlockTermState)} is done, only if the term has at least one |
| * document. */ |
| - public abstract void startTerm() throws IOException; |
| + public abstract void startTerm(NumericDocValues norms) throws IOException; |
| |
| /** Finishes the current term. The provided {@link |
| * BlockTermState} contains the term's summary statistics, |
| @@ -117,8 +118,14 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { |
| } |
| |
| @Override |
| - public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen) throws IOException { |
| - startTerm(); |
| + public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException { |
| + NumericDocValues normValues; |
| + if (fieldInfo.hasNorms() == false) { |
| + normValues = null; |
| + } else { |
| + normValues = norms.getNorms(fieldInfo); |
| + } |
| + startTerm(normValues); |
| postingsEnum = termsEnum.postings(postingsEnum, enumFlags); |
| assert postingsEnum != null; |
| |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java |
| index ffc182f..4305c46 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java |
| @@ -17,9 +17,13 @@ |
| package org.apache.lucene.codecs.blocktree; |
| |
| |
| +import java.io.IOException; |
| + |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.BitSet; |
| import org.apache.lucene.util.BytesRef; |
| |
| @@ -80,4 +84,9 @@ class BitSetTermsEnum extends TermsEnum { |
| postingsEnum.reset(); |
| return postingsEnum; |
| } |
| + |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java |
| index bdacc22..ec3f6e6 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java |
| @@ -24,6 +24,7 @@ import java.util.List; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| @@ -315,7 +316,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment); |
| |
| String lastField = null; |
| @@ -340,7 +341,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| //if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term)); |
| - termsWriter.write(term, termsEnum); |
| + termsWriter.write(term, termsEnum, norms); |
| } |
| |
| termsWriter.finish(); |
| @@ -852,7 +853,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| /** Writes one term's worth of postings. */ |
| - public void write(BytesRef text, TermsEnum termsEnum) throws IOException { |
| + public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException { |
| /* |
| if (DEBUG) { |
| int[] tmp = new int[lastTerm.length]; |
| @@ -861,7 +862,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { |
| } |
| */ |
| |
| - BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen); |
| + BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms); |
| if (state != null) { |
| |
| assert state.docFreq != 0; |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java |
| index 7521763..6bccddc 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java |
| @@ -19,10 +19,12 @@ package org.apache.lucene.codecs.blocktree; |
| |
| import java.io.IOException; |
| |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| @@ -232,6 +234,12 @@ final class IntersectTermsEnum extends TermsEnum { |
| return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, reuse, flags); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + currentFrame.decodeMetaData(); |
| + return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags); |
| + } |
| + |
| private int getState() { |
| int state = currentFrame.state; |
| for(int idx=0;idx<currentFrame.suffix;idx++) { |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java |
| index 73c32bb..ef83f49 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java |
| @@ -21,9 +21,11 @@ import java.io.IOException; |
| import java.io.PrintStream; |
| |
| import org.apache.lucene.codecs.BlockTermState; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| @@ -1003,6 +1005,19 @@ final class SegmentTermsEnum extends TermsEnum { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + assert !eof; |
| + //if (DEBUG) { |
| + //System.out.println("BTTR.docs seg=" + segment); |
| + //} |
| + currentFrame.decodeMetaData(); |
| + //if (DEBUG) { |
| + //System.out.println(" state=" + currentFrame.state); |
| + //} |
| + return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef target, TermState otherState) { |
| // if (DEBUG) { |
| // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState); |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java |
| index f5318ba..a0f5292 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java |
| @@ -27,14 +27,17 @@ import java.util.NoSuchElementException; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| import org.apache.lucene.index.CorruptIndexException; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.Fields; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| @@ -942,6 +945,13 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem |
| return docsEnum; |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + final PostingsEnum delegate = postings(null, PostingsEnum.FREQS); |
| + final float maxScore = scorer.score(Float.MAX_VALUE, 1); |
| + return new SlowImpactsEnum(delegate, maxScore); |
| + } |
| + |
| } |
| |
| private static class TVPostingsEnum extends PostingsEnum { |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java |
| index 3b0d5c2..a99894a 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java |
| @@ -384,7 +384,8 @@ public final class Lucene50PostingsFormat extends PostingsFormat { |
| |
| // Increment version to change it |
| final static int VERSION_START = 0; |
| - final static int VERSION_CURRENT = VERSION_START; |
| + final static int VERSION_IMPACT_SKIP_DATA = 1; |
| + final static int VERSION_CURRENT = VERSION_IMPACT_SKIP_DATA; |
| |
| private final int minTermBlockSize; |
| private final int maxTermBlockSize; |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java |
| index 0dde774..fea0e208 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java |
| @@ -19,16 +19,20 @@ package org.apache.lucene.codecs.lucene50; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| +import java.util.Objects; |
| |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState; |
| import org.apache.lucene.index.FieldInfo; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.SegmentReadState; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| @@ -234,6 +238,16 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { |
| } |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException { |
| + Objects.requireNonNull(scorer); |
| + if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) { |
| + // no skip data |
| + return new SlowImpactsEnum(postings(fieldInfo, state, null, flags), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| + return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, scorer, flags); |
| + } |
| + |
| final class BlockDocsEnum extends PostingsEnum { |
| private final byte[] encoded; |
| |
| @@ -401,7 +415,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { |
| |
| if (skipper == null) { |
| // Lazy init: first time this enum has ever been used for skipping |
| - skipper = new Lucene50SkipReader(docIn.clone(), |
| + skipper = new Lucene50SkipReader(version, |
| + docIn.clone(), |
| MAX_SKIP_LEVELS, |
| indexHasPos, |
| indexHasOffsets, |
| @@ -666,7 +681,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { |
| if (target > nextSkipDoc) { |
| if (skipper == null) { |
| // Lazy init: first time this enum has ever been used for skipping |
| - skipper = new Lucene50SkipReader(docIn.clone(), |
| + skipper = new Lucene50SkipReader(version, |
| + docIn.clone(), |
| MAX_SKIP_LEVELS, |
| true, |
| indexHasOffsets, |
| @@ -1082,7 +1098,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { |
| if (target > nextSkipDoc) { |
| if (skipper == null) { |
| // Lazy init: first time this enum has ever been used for skipping |
| - skipper = new Lucene50SkipReader(docIn.clone(), |
| + skipper = new Lucene50SkipReader(version, |
| + docIn.clone(), |
| MAX_SKIP_LEVELS, |
| true, |
| indexHasOffsets, |
| @@ -1272,6 +1289,469 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { |
| } |
| } |
| |
| + final class BlockImpactsEverythingEnum extends ImpactsEnum { |
| + |
| + private final byte[] encoded; |
| + |
| + private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE]; |
| + private final int[] freqBuffer = new int[MAX_DATA_SIZE]; |
| + private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE]; |
| + |
| + private final int[] payloadLengthBuffer; |
| + private final int[] offsetStartDeltaBuffer; |
| + private final int[] offsetLengthBuffer; |
| + |
| + private byte[] payloadBytes; |
| + private int payloadByteUpto; |
| + private int payloadLength; |
| + |
| + private int lastStartOffset; |
| + private int startOffset = -1; |
| + private int endOffset = -1; |
| + |
| + private int docBufferUpto; |
| + private int posBufferUpto; |
| + |
| + private final Lucene50ScoreSkipReader skipper; |
| + |
| + IndexInput docIn; |
| + final IndexInput posIn; |
| + final IndexInput payIn; |
| + final BytesRef payload; |
| + |
| + final boolean indexHasFreq; |
| + final boolean indexHasPos; |
| + final boolean indexHasOffsets; |
| + final boolean indexHasPayloads; |
| + |
| + private int docFreq; // number of docs in this posting list |
| + private long totalTermFreq; // number of positions in this posting list |
| + private int docUpto; // how many docs we've read |
| + private int doc; // doc we last read |
| + private int accum; // accumulator for doc deltas |
| + private int freq; // freq we last read |
| + private int position; // current position |
| + |
| + // how many positions "behind" we are; nextPosition must |
| + // skip these to "catch up": |
| + private int posPendingCount; |
| + |
| + // Lazy pos seek: if != -1 then we must seek to this FP |
| + // before reading positions: |
| + private long posPendingFP; |
| + |
| + // Lazy pay seek: if != -1 then we must seek to this FP |
| + // before reading payloads/offsets: |
| + private long payPendingFP; |
| + |
| + // Where this term's postings start in the .doc file: |
| + private long docTermStartFP; |
| + |
| + // Where this term's postings start in the .pos file: |
| + private long posTermStartFP; |
| + |
| + // Where this term's payloads/offsets start in the .pay |
| + // file: |
| + private long payTermStartFP; |
| + |
| + // File pointer where the last (vInt encoded) pos delta |
| + // block is. We need this to know whether to bulk |
| + // decode vs vInt decode the block: |
| + private long lastPosBlockFP; |
| + |
| + private int nextSkipDoc = -1; |
| + |
| + private final boolean needsPositions; |
| + private final boolean needsOffsets; // true if we actually need offsets |
| + private final boolean needsPayloads; // true if we actually need payloads |
| + |
| + private long seekTo = -1; |
| + |
| + public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, SimScorer scorer, int flags) throws IOException { |
| + indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; |
| + indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; |
| + indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| + indexHasPayloads = fieldInfo.hasPayloads(); |
| + |
| + needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS); |
| + needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS); |
| + needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); |
| + |
| + this.docIn = Lucene50PostingsReader.this.docIn.clone(); |
| + |
| + encoded = new byte[MAX_ENCODED_SIZE]; |
| + |
| + if (indexHasPos && needsPositions) { |
| + this.posIn = Lucene50PostingsReader.this.posIn.clone(); |
| + } else { |
| + this.posIn = null; |
| + } |
| + |
| + if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) { |
| + this.payIn = Lucene50PostingsReader.this.payIn.clone(); |
| + } else { |
| + this.payIn = null; |
| + } |
| + |
| + if (indexHasOffsets) { |
| + offsetStartDeltaBuffer = new int[MAX_DATA_SIZE]; |
| + offsetLengthBuffer = new int[MAX_DATA_SIZE]; |
| + } else { |
| + offsetStartDeltaBuffer = null; |
| + offsetLengthBuffer = null; |
| + startOffset = -1; |
| + endOffset = -1; |
| + } |
| + |
| + if (indexHasPayloads) { |
| + payloadLengthBuffer = new int[MAX_DATA_SIZE]; |
| + payloadBytes = new byte[128]; |
| + payload = new BytesRef(); |
| + } else { |
| + payloadLengthBuffer = null; |
| + payloadBytes = null; |
| + payload = null; |
| + } |
| + |
| + docFreq = termState.docFreq; |
| + docTermStartFP = termState.docStartFP; |
| + posTermStartFP = termState.posStartFP; |
| + payTermStartFP = termState.payStartFP; |
| + totalTermFreq = termState.totalTermFreq; |
| + docIn.seek(docTermStartFP); |
| + posPendingFP = posTermStartFP; |
| + payPendingFP = payTermStartFP; |
| + posPendingCount = 0; |
| + if (termState.totalTermFreq < BLOCK_SIZE) { |
| + lastPosBlockFP = posTermStartFP; |
| + } else if (termState.totalTermFreq == BLOCK_SIZE) { |
| + lastPosBlockFP = -1; |
| + } else { |
| + lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; |
| + } |
| + |
| + doc = -1; |
| + accum = 0; |
| + docUpto = 0; |
| + docBufferUpto = BLOCK_SIZE; |
| + |
| + skipper = new Lucene50ScoreSkipReader(version, |
| + docIn.clone(), |
| + MAX_SKIP_LEVELS, |
| + indexHasPos, |
| + indexHasOffsets, |
| + indexHasPayloads, |
| + scorer); |
| + skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq); |
| + |
| + if (indexHasFreq == false) { |
| + Arrays.fill(freqBuffer, 1); |
| + } |
| + } |
| + |
| + @Override |
| + public int freq() throws IOException { |
| + return freq; |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return doc; |
| + } |
| + |
| + private void refillDocs() throws IOException { |
| + final int left = docFreq - docUpto; |
| + assert left > 0; |
| + |
| + if (left >= BLOCK_SIZE) { |
| + forUtil.readBlock(docIn, encoded, docDeltaBuffer); |
| + if (indexHasFreq) { |
| + forUtil.readBlock(docIn, encoded, freqBuffer); |
| + } |
| + } else { |
| + readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq); |
| + } |
| + docBufferUpto = 0; |
| + } |
| + |
| + private void refillPositions() throws IOException { |
| + if (posIn.getFilePointer() == lastPosBlockFP) { |
| + final int count = (int) (totalTermFreq % BLOCK_SIZE); |
| + int payloadLength = 0; |
| + int offsetLength = 0; |
| + payloadByteUpto = 0; |
| + for(int i=0;i<count;i++) { |
| + int code = posIn.readVInt(); |
| + if (indexHasPayloads) { |
| + if ((code & 1) != 0) { |
| + payloadLength = posIn.readVInt(); |
| + } |
| + payloadLengthBuffer[i] = payloadLength; |
| + posDeltaBuffer[i] = code >>> 1; |
| + if (payloadLength != 0) { |
| + if (payloadByteUpto + payloadLength > payloadBytes.length) { |
| + payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength); |
| + } |
| + posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength); |
| + payloadByteUpto += payloadLength; |
| + } |
| + } else { |
| + posDeltaBuffer[i] = code; |
| + } |
| + |
| + if (indexHasOffsets) { |
| + int deltaCode = posIn.readVInt(); |
| + if ((deltaCode & 1) != 0) { |
| + offsetLength = posIn.readVInt(); |
| + } |
| + offsetStartDeltaBuffer[i] = deltaCode >>> 1; |
| + offsetLengthBuffer[i] = offsetLength; |
| + } |
| + } |
| + payloadByteUpto = 0; |
| + } else { |
| + forUtil.readBlock(posIn, encoded, posDeltaBuffer); |
| + |
| + if (indexHasPayloads && payIn != null) { |
| + if (needsPayloads) { |
| + forUtil.readBlock(payIn, encoded, payloadLengthBuffer); |
| + int numBytes = payIn.readVInt(); |
| + |
| + if (numBytes > payloadBytes.length) { |
| + payloadBytes = ArrayUtil.grow(payloadBytes, numBytes); |
| + } |
| + payIn.readBytes(payloadBytes, 0, numBytes); |
| + } else { |
| + // this works, because when writing a vint block we always force the first length to be written |
| + forUtil.skipBlock(payIn); // skip over lengths |
| + int numBytes = payIn.readVInt(); // read length of payloadBytes |
| + payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes |
| + } |
| + payloadByteUpto = 0; |
| + } |
| + |
| + if (indexHasOffsets && payIn != null) { |
| + if (needsOffsets) { |
| + forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer); |
| + forUtil.readBlock(payIn, encoded, offsetLengthBuffer); |
| + } else { |
| + // this works, because when writing a vint block we always force the first length to be written |
| + forUtil.skipBlock(payIn); // skip over starts |
| + forUtil.skipBlock(payIn); // skip over lengths |
| + } |
| + } |
| + } |
| + } |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + return advance(doc + 1); |
| + } |
| + |
| + @Override |
| + public float getMaxScore(int upTo) throws IOException { |
| + return skipper.getMaxScore(upTo); |
| + } |
| + |
| + @Override |
| + public int advanceShallow(int target) throws IOException { |
| + if (target > nextSkipDoc) { |
| + // always plus one to fix the result, since skip position in Lucene50SkipReader |
| + // is a little different from MultiLevelSkipListReader |
| + final int newDocUpto = skipper.skipTo(target) + 1; |
| + |
| + if (newDocUpto > docUpto) { |
| + // Skipper moved |
| + assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; |
| + docUpto = newDocUpto; |
| + |
| + // Force to read next block |
| + docBufferUpto = BLOCK_SIZE; |
| + accum = skipper.getDoc(); |
| + posPendingFP = skipper.getPosPointer(); |
| + payPendingFP = skipper.getPayPointer(); |
| + posPendingCount = skipper.getPosBufferUpto(); |
| + lastStartOffset = 0; // new document |
| + payloadByteUpto = skipper.getPayloadByteUpto(); // actually, this is just lastSkipEntry |
| + seekTo = skipper.getDocPointer(); // delay the seek |
| + } |
| + // next time we call advance, this is used to |
| + // foresee whether skipper is necessary. |
| + nextSkipDoc = skipper.getNextSkipDoc(); |
| + } |
| + assert nextSkipDoc >= target; |
| + return nextSkipDoc; |
| + } |
| + |
| + @Override |
| + public int advance(int target) throws IOException { |
| + if (target > nextSkipDoc) { |
| + advanceShallow(target); |
| + } |
| + if (docUpto == docFreq) { |
| + return doc = NO_MORE_DOCS; |
| + } |
| + if (docBufferUpto == BLOCK_SIZE) { |
| + if (seekTo >= 0) { |
| + docIn.seek(seekTo); |
| + seekTo = -1; |
| + } |
| + refillDocs(); |
| + } |
| + |
| + // Now scan: |
| + while (true) { |
| + accum += docDeltaBuffer[docBufferUpto]; |
| + freq = freqBuffer[docBufferUpto]; |
| + posPendingCount += freq; |
| + docBufferUpto++; |
| + docUpto++; |
| + |
| + if (accum >= target) { |
| + break; |
| + } |
| + if (docUpto == docFreq) { |
| + return doc = NO_MORE_DOCS; |
| + } |
| + } |
| + position = 0; |
| + lastStartOffset = 0; |
| + |
| + return doc = accum; |
| + } |
| + |
| + // TODO: in theory we could avoid loading frq block |
| + // when not needed, ie, use skip data to load how far to |
| + // seek the pos pointer ... instead of having to load frq |
| + // blocks only to sum up how many positions to skip |
| + private void skipPositions() throws IOException { |
| + // Skip positions now: |
| + int toSkip = posPendingCount - freq; |
| + // if (DEBUG) { |
| + // System.out.println(" FPR.skipPositions: toSkip=" + toSkip); |
| + // } |
| + |
| + final int leftInBlock = BLOCK_SIZE - posBufferUpto; |
| + if (toSkip < leftInBlock) { |
| + int end = posBufferUpto + toSkip; |
| + while(posBufferUpto < end) { |
| + if (indexHasPayloads) { |
| + payloadByteUpto += payloadLengthBuffer[posBufferUpto]; |
| + } |
| + posBufferUpto++; |
| + } |
| + } else { |
| + toSkip -= leftInBlock; |
| + while(toSkip >= BLOCK_SIZE) { |
| + assert posIn.getFilePointer() != lastPosBlockFP; |
| + forUtil.skipBlock(posIn); |
| + |
| + if (indexHasPayloads && payIn != null) { |
| + // Skip payloadLength block: |
| + forUtil.skipBlock(payIn); |
| + |
| + // Skip payloadBytes block: |
| + int numBytes = payIn.readVInt(); |
| + payIn.seek(payIn.getFilePointer() + numBytes); |
| + } |
| + |
| + if (indexHasOffsets && payIn != null) { |
| + forUtil.skipBlock(payIn); |
| + forUtil.skipBlock(payIn); |
| + } |
| + toSkip -= BLOCK_SIZE; |
| + } |
| + refillPositions(); |
| + payloadByteUpto = 0; |
| + posBufferUpto = 0; |
| + while(posBufferUpto < toSkip) { |
| + if (indexHasPayloads) { |
| + payloadByteUpto += payloadLengthBuffer[posBufferUpto]; |
| + } |
| + posBufferUpto++; |
| + } |
| + } |
| + |
| + position = 0; |
| + lastStartOffset = 0; |
| + } |
| + |
| + @Override |
| + public int nextPosition() throws IOException { |
| + if (indexHasPos == false || needsPositions == false) { |
| + return -1; |
| + } |
| + assert posPendingCount > 0; |
| + |
| + if (posPendingFP != -1) { |
| + posIn.seek(posPendingFP); |
| + posPendingFP = -1; |
| + |
| + if (payPendingFP != -1 && payIn != null) { |
| + payIn.seek(payPendingFP); |
| + payPendingFP = -1; |
| + } |
| + |
| + // Force buffer refill: |
| + posBufferUpto = BLOCK_SIZE; |
| + } |
| + |
| + if (posPendingCount > freq) { |
| + skipPositions(); |
| + posPendingCount = freq; |
| + } |
| + |
| + if (posBufferUpto == BLOCK_SIZE) { |
| + refillPositions(); |
| + posBufferUpto = 0; |
| + } |
| + position += posDeltaBuffer[posBufferUpto]; |
| + |
| + if (indexHasPayloads) { |
| + payloadLength = payloadLengthBuffer[posBufferUpto]; |
| + payload.bytes = payloadBytes; |
| + payload.offset = payloadByteUpto; |
| + payload.length = payloadLength; |
| + payloadByteUpto += payloadLength; |
| + } |
| + |
| + if (indexHasOffsets && needsOffsets) { |
| + startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto]; |
| + endOffset = startOffset + offsetLengthBuffer[posBufferUpto]; |
| + lastStartOffset = startOffset; |
| + } |
| + |
| + posBufferUpto++; |
| + posPendingCount--; |
| + return position; |
| + } |
| + |
| + @Override |
| + public int startOffset() { |
| + return startOffset; |
| + } |
| + |
| + @Override |
| + public int endOffset() { |
| + return endOffset; |
| + } |
| + |
| + @Override |
| + public BytesRef getPayload() { |
| + if (payloadLength == 0) { |
| + return null; |
| + } else { |
| + return payload; |
| + } |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return docFreq; |
| + } |
| + |
| + } |
| + |
| @Override |
| public long ramBytesUsed() { |
| return BASE_RAM_BYTES_USED; |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java |
| index 6d24a4c..06b9a0c 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java |
| @@ -31,12 +31,14 @@ import java.io.IOException; |
| |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| +import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator; |
| import org.apache.lucene.codecs.PushPostingsWriterBase; |
| import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState; |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexWriter; |
| +import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.IndexOutput; |
| @@ -96,7 +98,11 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { |
| |
| private final ForUtil forUtil; |
| private final Lucene50SkipWriter skipWriter; |
| - |
| + |
| + private boolean fieldHasNorms; |
| + private NumericDocValues norms; |
| + private final CompetitiveFreqNormAccumulator competitiveFreqNormAccumulator = new CompetitiveFreqNormAccumulator(); |
| + |
| /** Creates a postings writer */ |
| public Lucene50PostingsWriter(SegmentWriteState state) throws IOException { |
| final float acceptableOverheadRatio = PackedInts.COMPACT; |
| @@ -185,6 +191,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { |
| super.setField(fieldInfo); |
| skipWriter.setField(writePositions, writeOffsets, writePayloads); |
| lastState = emptyState; |
| + fieldHasNorms = fieldInfo.hasNorms(); |
| if (writePositions) { |
| if (writePayloads || writeOffsets) { |
| return 3; // doc + pos + pay FP |
| @@ -197,7 +204,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { |
| } |
| |
| @Override |
| - public void startTerm() { |
| + public void startTerm(NumericDocValues norms) { |
| docStartFP = docOut.getFilePointer(); |
| if (writePositions) { |
| posStartFP = posOut.getFilePointer(); |
| @@ -208,6 +215,8 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { |
| lastDocID = 0; |
| lastBlockDocID = -1; |
| skipWriter.resetSkip(); |
| + this.norms = norms; |
| + competitiveFreqNormAccumulator.clear(); |
| } |
| |
| @Override |
| @@ -216,7 +225,9 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { |
| // Should write skip data as well as postings list for |
| // current block. |
| if (lastBlockDocID != -1 && docBufferUpto == 0) { |
| - skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto); |
| + skipWriter.bufferSkip(lastBlockDocID, competitiveFreqNormAccumulator, docCount, |
| + lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto); |
| + competitiveFreqNormAccumulator.clear(); |
| } |
| |
| final int docDelta = docID - lastDocID; |
| @@ -247,6 +258,24 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { |
| lastDocID = docID; |
| lastPosition = 0; |
| lastStartOffset = 0; |
| + |
| + long norm; |
| + if (fieldHasNorms) { |
| + boolean found = norms.advanceExact(docID); |
| + if (found == false) { |
| + // This can happen if indexing hits a problem after adding a doc to the |
| + // postings but before buffering the norm. Such documents are written |
| + // deleted and will go away on the first merge. |
| + norm = 1L; |
| + } else { |
| + norm = norms.longValue(); |
| + assert norm != 0 : docID; |
| + } |
| + } else { |
| + norm = 1L; |
| + } |
| + |
| + competitiveFreqNormAccumulator.add(writeFreqs ? termDocFreq : 1, norm); |
| } |
| |
| @Override |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java |
| new file mode 100644 |
| index 0000000..cb1f54a |
| --- /dev/null |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java |
| @@ -0,0 +1,106 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs.lucene50; |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| +import java.util.Objects; |
| + |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| +import org.apache.lucene.store.ByteArrayDataInput; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.util.ArrayUtil; |
| + |
| +final class Lucene50ScoreSkipReader extends Lucene50SkipReader { |
| + |
| + private final SimScorer scorer; |
| + private final float[] maxScore; |
| + private final byte[][] impacts; |
| + private final int[] impactsLength; |
| + private final float globalMaxScore; |
| + private final ByteArrayDataInput badi = new ByteArrayDataInput(); |
| + |
| + public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels, |
| + boolean hasPos, boolean hasOffsets, boolean hasPayloads, SimScorer scorer) { |
| + super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads); |
| + if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) { |
| + throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed"); |
| + } |
| + this.scorer = Objects.requireNonNull(scorer); |
| + this.maxScore = new float[maxSkipLevels]; |
| + this.impacts = new byte[maxSkipLevels][]; |
| + Arrays.fill(impacts, new byte[0]); |
| + this.impactsLength = new int[maxSkipLevels]; |
| + this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1); |
| + } |
| + |
| + @Override |
| + public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException { |
| + super.init(skipPointer, docBasePointer, posBasePointer, payBasePointer, df); |
| + Arrays.fill(impactsLength, 0); |
| + Arrays.fill(maxScore, globalMaxScore); |
| + } |
| + |
| + /** Upper bound of scores up to {@code upTo} included. */ |
| + public float getMaxScore(int upTo) throws IOException { |
| + for (int level = 0; level < numberOfSkipLevels; ++level) { |
| + if (upTo <= skipDoc[level]) { |
| + return maxScore(level); |
| + } |
| + } |
| + return globalMaxScore; |
| + } |
| + |
| + private float maxScore(int level) throws IOException { |
| + assert level < numberOfSkipLevels; |
| + if (impactsLength[level] > 0) { |
| + badi.reset(impacts[level], 0, impactsLength[level]); |
| + maxScore[level] = readImpacts(badi, scorer); |
| + impactsLength[level] = 0; |
| + } |
| + return maxScore[level]; |
| + } |
| + |
| + @Override |
| + protected void readImpacts(int level, IndexInput skipStream) throws IOException { |
| + int length = skipStream.readVInt(); |
| + if (impacts[level].length < length) { |
| + impacts[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)]; |
| + } |
| + skipStream.readBytes(impacts[level], 0, length); |
| + impactsLength[level] = length; |
| + } |
| + |
| + static float readImpacts(ByteArrayDataInput in, SimScorer scorer) throws IOException { |
| + int freq = 0; |
| + long norm = 0; |
| + float maxScore = 0; |
| + while (in.getPosition() < in.length()) { |
| + int freqDelta = in.readVInt(); |
| + if ((freqDelta & 0x01) != 0) { |
| + freq += 1 + (freqDelta >>> 1); |
| + norm += 1 + in.readZLong(); |
| + } else { |
| + freq += 1 + (freqDelta >>> 1); |
| + norm++; |
| + } |
| + maxScore = Math.max(maxScore, scorer.score(freq, norm)); |
| + } |
| + return maxScore; |
| + } |
| + |
| +} |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java |
| index 8c037c5..b92cd42 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java |
| @@ -52,7 +52,8 @@ import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZ |
| * Therefore, we'll trim df before passing it to the interface. see trim(int) |
| * |
| */ |
| -final class Lucene50SkipReader extends MultiLevelSkipListReader { |
| +class Lucene50SkipReader extends MultiLevelSkipListReader { |
| + private final int version; |
| private long docPointer[]; |
| private long posPointer[]; |
| private long payPointer[]; |
| @@ -65,8 +66,11 @@ final class Lucene50SkipReader extends MultiLevelSkipListReader { |
| private long lastDocPointer; |
| private int lastPosBufferUpto; |
| |
| - public Lucene50SkipReader(IndexInput skipStream, int maxSkipLevels, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { |
| + public Lucene50SkipReader(int version, |
| + IndexInput skipStream, int maxSkipLevels, |
| + boolean hasPos, boolean hasOffsets, boolean hasPayloads) { |
| super(skipStream, maxSkipLevels, BLOCK_SIZE, 8); |
| + this.version = version; |
| docPointer = new long[maxSkipLevels]; |
| if (hasPos) { |
| posPointer = new long[maxSkipLevels]; |
| @@ -192,6 +196,17 @@ final class Lucene50SkipReader extends MultiLevelSkipListReader { |
| payPointer[level] += skipStream.readVLong(); |
| } |
| } |
| + readImpacts(level, skipStream); |
| return delta; |
| } |
| + |
| + // The default impl skips impacts since they are only useful if we have a SimScorer |
| + // to compute the scores that impacts map to. |
| + protected void readImpacts(int level, IndexInput skipStream) throws IOException { |
| + if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) { |
| + // The base implementation skips impacts, they are not used |
| + skipStream.skipBytes(skipStream.readVInt()); |
| + } |
| + } |
| + |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java |
| index a4556c6..cc94ed0 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java |
| @@ -19,9 +19,14 @@ package org.apache.lucene.codecs.lucene50; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| +import java.util.Set; |
| +import java.util.SortedSet; |
| |
| -import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator; |
| +import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm; |
| import org.apache.lucene.codecs.MultiLevelSkipListWriter; |
| +import org.apache.lucene.store.IndexOutput; |
| +import org.apache.lucene.store.RAMOutputStream; |
| |
| /** |
| * Write skip lists with multiple levels, and support skip within block ints. |
| @@ -60,6 +65,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| private long curPayPointer; |
| private int curPosBufferUpto; |
| private int curPayloadByteUpto; |
| + private CompetitiveFreqNormAccumulator[] curCompetitiveFreqNorms; |
| private boolean fieldHasPositions; |
| private boolean fieldHasOffsets; |
| private boolean fieldHasPayloads; |
| @@ -79,6 +85,10 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| } |
| lastPayloadByteUpto = new int[maxSkipLevels]; |
| } |
| + curCompetitiveFreqNorms = new CompetitiveFreqNormAccumulator[maxSkipLevels]; |
| + for (int i = 0; i < maxSkipLevels; ++i) { |
| + curCompetitiveFreqNorms[i] = new CompetitiveFreqNormAccumulator(); |
| + } |
| } |
| |
| public void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) { |
| @@ -105,10 +115,15 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| lastPayFP = payOut.getFilePointer(); |
| } |
| } |
| + if (initialized) { |
| + for (CompetitiveFreqNormAccumulator acc : curCompetitiveFreqNorms) { |
| + acc.clear(); |
| + } |
| + } |
| initialized = false; |
| } |
| |
| - public void initSkip() { |
| + private void initSkip() { |
| if (!initialized) { |
| super.resetSkip(); |
| Arrays.fill(lastSkipDoc, 0); |
| @@ -122,6 +137,11 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| Arrays.fill(lastSkipPayPointer, lastPayFP); |
| } |
| } |
| + // sets of competitive freq,norm pairs should be empty at this point |
| + assert Arrays.stream(curCompetitiveFreqNorms) |
| + .map(CompetitiveFreqNormAccumulator::getCompetitiveFreqNormPairs) |
| + .mapToInt(Set::size) |
| + .sum() == 0; |
| initialized = true; |
| } |
| } |
| @@ -129,7 +149,8 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| /** |
| * Sets the values for the current skip data. |
| */ |
| - public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException { |
| + public void bufferSkip(int doc, CompetitiveFreqNormAccumulator competitiveFreqNorms, |
| + int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException { |
| initSkip(); |
| this.curDoc = doc; |
| this.curDocPointer = docOut.getFilePointer(); |
| @@ -137,11 +158,15 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| this.curPayPointer = payFP; |
| this.curPosBufferUpto = posBufferUpto; |
| this.curPayloadByteUpto = payloadByteUpto; |
| + this.curCompetitiveFreqNorms[0].addAll(competitiveFreqNorms); |
| bufferSkip(numDocs); |
| } |
| - |
| + |
| + private final RAMOutputStream freqNormOut = new RAMOutputStream(); |
| + |
| @Override |
| protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException { |
| + |
| int delta = curDoc - lastSkipDoc[level]; |
| |
| skipBuffer.writeVInt(delta); |
| @@ -165,5 +190,35 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter { |
| lastSkipPayPointer[level] = curPayPointer; |
| } |
| } |
| + |
| + CompetitiveFreqNormAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level]; |
| + assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0; |
| + if (level + 1 < numberOfSkipLevels) { |
| + curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms); |
| + } |
| + writeImpacts(competitiveFreqNorms, freqNormOut); |
| + skipBuffer.writeVInt(Math.toIntExact(freqNormOut.getFilePointer())); |
| + freqNormOut.writeTo(skipBuffer); |
| + freqNormOut.reset(); |
| + competitiveFreqNorms.clear(); |
| + } |
| + |
| + static void writeImpacts(CompetitiveFreqNormAccumulator acc, IndexOutput out) throws IOException { |
| + SortedSet<FreqAndNorm> freqAndNorms = acc.getCompetitiveFreqNormPairs(); |
| + FreqAndNorm previous = new FreqAndNorm(0, 0); |
| + for (FreqAndNorm freqAndNorm : freqAndNorms) { |
| + assert freqAndNorm.freq > previous.freq; |
| + assert Long.compareUnsigned(freqAndNorm.norm, previous.norm) > 0; |
| + int freqDelta = freqAndNorm.freq - previous.freq - 1; |
| + long normDelta = freqAndNorm.norm - previous.norm - 1; |
| + if (normDelta == 0) { |
| + // most of time, norm only increases by 1, so we can fold everything in a single byte |
| + out.writeVInt(freqDelta << 1); |
| + } else { |
| + out.writeVInt((freqDelta << 1) | 1); |
| + out.writeZLong(normDelta); |
| + } |
| + previous = freqAndNorm; |
| + } |
| } |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java |
| index 24eaf7a..6138896 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java |
| @@ -100,7 +100,13 @@ final class IndexedDISI extends DocIdSetIterator { |
| private final long cost; |
| |
| IndexedDISI(IndexInput in, long offset, long length, long cost) throws IOException { |
| - this.slice = in.slice("docs", offset, length); |
| + this(in.slice("docs", offset, length), cost); |
| + } |
| + |
| + // This constructor allows to pass the slice directly in case it helps reuse |
| + // see eg. Lucene70 norms producer's merge instance |
| + IndexedDISI(IndexInput slice, long cost) throws IOException { |
| + this.slice = slice; |
| this.cost = cost; |
| } |
| |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java |
| index 386655e..7bea274 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java |
| @@ -28,6 +28,7 @@ import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.PostingsEnum; |
| @@ -37,6 +38,7 @@ import org.apache.lucene.index.SortedNumericDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.index.TermsEnum.SeekStatus; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.RandomAccessInput; |
| @@ -1158,6 +1160,11 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| + @Override |
| public int docFreq() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java |
| index eb7c41a..c7310e8 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java |
| @@ -40,11 +40,14 @@ import org.apache.lucene.util.IOUtils; |
| /** |
| * Reader for {@link Lucene70NormsFormat} |
| */ |
| -final class Lucene70NormsProducer extends NormsProducer { |
| +final class Lucene70NormsProducer extends NormsProducer implements Cloneable { |
| // metadata maps (just file pointers and minimal stuff) |
| private final Map<Integer,NormsEntry> norms = new HashMap<>(); |
| - private final IndexInput data; |
| private final int maxDoc; |
| + private IndexInput data; |
| + private boolean merging; |
| + private Map<Integer, IndexInput> disiInputs; |
| + private Map<Integer, RandomAccessInput> dataInputs; |
| |
| Lucene70NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| maxDoc = state.segmentInfo.maxDoc(); |
| @@ -87,6 +90,22 @@ final class Lucene70NormsProducer extends NormsProducer { |
| } |
| } |
| |
| + @Override |
| + public NormsProducer getMergeInstance() throws IOException { |
| + Lucene70NormsProducer clone; |
| + try { |
| + clone = (Lucene70NormsProducer) super.clone(); |
| + } catch (CloneNotSupportedException e) { |
| + // cannot happen |
| + throw new RuntimeException(e); |
| + } |
| + clone.data = data.clone(); |
| + clone.dataInputs = new HashMap<>(); |
| + clone.disiInputs = new HashMap<>(); |
| + clone.merging = true; |
| + return clone; |
| + } |
| + |
| static class NormsEntry { |
| byte bytesPerNorm; |
| long docsWithFieldOffset; |
| @@ -193,6 +212,34 @@ final class Lucene70NormsProducer extends NormsProducer { |
| } |
| } |
| |
| + private RandomAccessInput getDataInput(FieldInfo field, NormsEntry entry) throws IOException { |
| + RandomAccessInput slice = null; |
| + if (merging) { |
| + slice = dataInputs.get(field.number); |
| + } |
| + if (slice == null) { |
| + slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm); |
| + if (merging) { |
| + dataInputs.put(field.number, slice); |
| + } |
| + } |
| + return slice; |
| + } |
| + |
| + private IndexInput getDisiInput(FieldInfo field, NormsEntry entry) throws IOException { |
| + IndexInput slice = null; |
| + if (merging) { |
| + slice = disiInputs.get(field.number); |
| + } |
| + if (slice == null) { |
| + slice = data.slice("docs", entry.docsWithFieldOffset, entry.docsWithFieldLength); |
| + if (merging) { |
| + disiInputs.put(field.number, slice); |
| + } |
| + } |
| + return slice; |
| + } |
| + |
| @Override |
| public NumericDocValues getNorms(FieldInfo field) throws IOException { |
| final NormsEntry entry = norms.get(field.number); |
| @@ -209,7 +256,7 @@ final class Lucene70NormsProducer extends NormsProducer { |
| } |
| }; |
| } |
| - final RandomAccessInput slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm); |
| + final RandomAccessInput slice = getDataInput(field, entry); |
| switch (entry.bytesPerNorm) { |
| case 1: |
| return new DenseNormsIterator(maxDoc) { |
| @@ -245,7 +292,8 @@ final class Lucene70NormsProducer extends NormsProducer { |
| } |
| } else { |
| // sparse |
| - final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField); |
| + final IndexInput disiInput = getDisiInput(field, entry); |
| + final IndexedDISI disi = new IndexedDISI(disiInput, entry.numDocsWithField); |
| if (entry.bytesPerNorm == 0) { |
| return new SparseNormsIterator(disi) { |
| @Override |
| diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java |
| index 281b08f..36f0358 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java |
| +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java |
| @@ -34,6 +34,7 @@ import java.util.TreeSet; |
| |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.Fields; |
| @@ -117,7 +118,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields); |
| |
| // Write postings |
| @@ -137,7 +138,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { |
| |
| FieldsConsumer consumer = format.fieldsConsumer(group.state); |
| toClose.add(consumer); |
| - consumer.write(maskedFields); |
| + consumer.write(maskedFields, norms); |
| } |
| success = true; |
| } finally { |
| @@ -148,7 +149,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void merge(MergeState mergeState) throws IOException { |
| + public void merge(MergeState mergeState, NormsProducer norms) throws IOException { |
| Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null)); |
| |
| // Merge postings |
| @@ -161,7 +162,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { |
| |
| FieldsConsumer consumer = format.fieldsConsumer(group.state); |
| toClose.add(consumer); |
| - consumer.merge(pfMergeState.apply(group.fields)); |
| + consumer.merge(pfMergeState.apply(group.fields), norms); |
| } |
| success = true; |
| } finally { |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java |
| index c676568..7dd1aa9 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java |
| @@ -48,6 +48,7 @@ import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.LeafFieldComparator; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.SortField; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.AlreadyClosedException; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| @@ -1598,8 +1599,109 @@ public final class CheckIndex implements Closeable { |
| } |
| } |
| } |
| + |
| + // Test score blocks |
| + // We only score on freq to keep things simple and not pull norms |
| + SimScorer scorer = new SimScorer(field) { |
| + @Override |
| + public float score(float freq, long norm) { |
| + return freq; |
| + } |
| + }; |
| + |
| + // First check max scores and block uptos |
| + int max = -1; |
| + float maxScore = 0; |
| + ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS); |
| + postings = termsEnum.postings(postings, PostingsEnum.FREQS); |
| + for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) { |
| + if (postings.nextDoc() != doc) { |
| + throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID()); |
| + } |
| + if (doc == DocIdSetIterator.NO_MORE_DOCS) { |
| + break; |
| + } |
| + if (postings.freq() != impacts.freq()) { |
| + throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq()); |
| + } |
| + if (doc > max) { |
| + max = impacts.advanceShallow(doc); |
| + if (max < doc) { |
| + throw new RuntimeException("max block doc id " + max + " must be greater than the target: " + doc); |
| + } |
| + maxScore = impacts.getMaxScore(max); |
| + } |
| + int max2 = impacts.advanceShallow(doc); |
| + if (max != max2) { |
| + throw new RuntimeException("max is not stable, initially had " + max + " but now " + max2); |
| + } |
| + float score = scorer.score(impacts.freq(), 1); |
| + if (score > maxScore) { |
| + throw new RuntimeException("score " + score + " is greater than the max score " + maxScore); |
| + } |
| + } |
| + |
| + // Now check advancing |
| + impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS); |
| + postings = termsEnum.postings(postings, PostingsEnum.FREQS); |
| + |
| + max = -1; |
| + while (true) { |
| + int doc = impacts.docID(); |
| + boolean advance; |
| + int target; |
| + if (((field.hashCode() + doc) & 1) == 1) { |
| + advance = false; |
| + target = doc + 1; |
| + } else { |
| + advance = true; |
| + int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc); |
| + target = impacts.docID() + delta; |
| + } |
| + |
| + if (target > max && target % 2 == 1) { |
| + int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target); |
| + max = target + delta; |
| + int m = impacts.advanceShallow(target); |
| + if (m < target) { |
| + throw new RuntimeException("Block max doc: " + m + " is less than the target " + target); |
| + } |
| + maxScore = impacts.getMaxScore(max); |
| + } |
| + |
| + if (advance) { |
| + doc = impacts.advance(target); |
| + } else { |
| + doc = impacts.nextDoc(); |
| + } |
| + |
| + if (postings.advance(target) != doc) { |
| + throw new RuntimeException("Impacts do not advance to the same document as postings for target " + target + ", postings: " + postings.docID() + ", impacts: " + doc); |
| + } |
| + if (doc == DocIdSetIterator.NO_MORE_DOCS) { |
| + break; |
| + } |
| + if (postings.freq() != impacts.freq()) { |
| + throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq()); |
| + } |
| + |
| + if (doc >= max) { |
| + int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc); |
| + max = doc + delta; |
| + int m = impacts.advanceShallow(doc); |
| + if (m < doc) { |
| + throw new RuntimeException("Block max doc: " + m + " is less than the target " + doc); |
| + } |
| + maxScore = impacts.getMaxScore(max); |
| + } |
| + |
| + float score = scorer.score(impacts.freq(), 1); |
| + if (score > maxScore) { |
| + throw new RuntimeException("score " + score + " is greater than the max score " + maxScore); |
| + } |
| + } |
| } |
| - |
| + |
| if (minTerm != null && status.termCount + status.delTermCount == 0) { |
| throw new RuntimeException("field=\"" + field + "\": minTerm is non-null yet we saw no terms: " + minTerm); |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java |
| index fd24105..4e05aa6 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java |
| @@ -31,6 +31,7 @@ import org.apache.lucene.codecs.DocValuesConsumer; |
| import org.apache.lucene.codecs.DocValuesFormat; |
| import org.apache.lucene.codecs.NormsConsumer; |
| import org.apache.lucene.codecs.NormsFormat; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PointsFormat; |
| import org.apache.lucene.codecs.PointsWriter; |
| import org.apache.lucene.document.FieldType; |
| @@ -126,6 +127,7 @@ final class DefaultIndexingChain extends DocConsumer { |
| if (docState.infoStream.isEnabled("IW")) { |
| docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write norms"); |
| } |
| + SegmentReadState readState = new SegmentReadState(state.directory, state.segmentInfo, state.fieldInfos, IOContext.READ, state.segmentSuffix); |
| |
| t0 = System.nanoTime(); |
| writeDocValues(state, sortMap); |
| @@ -159,7 +161,16 @@ final class DefaultIndexingChain extends DocConsumer { |
| } |
| } |
| |
| - termsHash.flush(fieldsToFlush, state, sortMap); |
| + try (NormsProducer norms = readState.fieldInfos.hasNorms() |
| + ? state.segmentInfo.getCodec().normsFormat().normsProducer(readState) |
| + : null) { |
| + NormsProducer normsMergeInstance = null; |
| + if (norms != null) { |
| + // Use the merge instance in order to reuse the same IndexInput for all terms |
| + normsMergeInstance = norms.getMergeInstance(); |
| + } |
| + termsHash.flush(fieldsToFlush, state, sortMap, normsMergeInstance); |
| + } |
| if (docState.infoStream.isEnabled("IW")) { |
| docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write postings and finish vectors"); |
| } |
| @@ -693,6 +704,9 @@ final class DefaultIndexingChain extends DocConsumer { |
| normValue = 0; |
| } else { |
| normValue = similarity.computeNorm(invertState); |
| + if (normValue == 0) { |
| + throw new IllegalStateException("Similarity " + similarity + " return 0 for non-empty field"); |
| + } |
| } |
| norms.addValue(docState.docID, normValue); |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java |
| index 0450038..4a9b660 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java |
| @@ -20,6 +20,7 @@ package org.apache.lucene.index; |
| import java.io.IOException; |
| import java.util.Iterator; |
| |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -214,6 +215,10 @@ public abstract class FilterLeafReader extends LeafReader { |
| return in.postings(reuse, flags); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return in.impacts(scorer, flags); |
| + } |
| } |
| |
| /** Base class for filtering {@link PostingsEnum} implementations. */ |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java |
| index 6498dc0..411b435 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java |
| @@ -20,6 +20,7 @@ package org.apache.lucene.index; |
| import java.io.IOException; |
| |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.AttributeSource; |
| |
| /** |
| @@ -181,7 +182,12 @@ public abstract class FilteredTermsEnum extends TermsEnum { |
| public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { |
| return tenum.postings(reuse, flags); |
| } |
| - |
| + |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return tenum.impacts(scorer, flags); |
| + } |
| + |
| /** This enum does not support seeking! |
| * @throws UnsupportedOperationException In general, subclasses do not |
| * support seeking. |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java |
| index fb78a92..c3e7d71 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java |
| @@ -24,6 +24,7 @@ import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| @@ -273,6 +274,11 @@ class FreqProxFields extends Fields { |
| return docsEnum; |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| /** |
| * Expert: Returns the TermsEnums internal state to position the TermsEnum |
| * without re-seeking the term dictionary. |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java |
| index d953f8d..ac70669 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java |
| @@ -24,6 +24,7 @@ import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.util.CollectionUtil; |
| import org.apache.lucene.util.IOUtils; |
| |
| @@ -78,8 +79,9 @@ final class FreqProxTermsWriter extends TermsHash { |
| } |
| |
| @Override |
| - public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { |
| - super.flush(fieldsToFlush, state, sortMap); |
| + public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, |
| + Sorter.DocMap sortMap, NormsProducer norms) throws IOException { |
| + super.flush(fieldsToFlush, state, sortMap, norms); |
| |
| // Gather all fields that saw any postings: |
| List<FreqProxTermsWriterPerField> allFields = new ArrayList<>(); |
| @@ -105,7 +107,7 @@ final class FreqProxTermsWriter extends TermsHash { |
| FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state); |
| boolean success = false; |
| try { |
| - consumer.write(fields); |
| + consumer.write(fields, norms); |
| success = true; |
| } finally { |
| if (success) { |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java |
| new file mode 100644 |
| index 0000000..8deccff |
| --- /dev/null |
| +++ b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java |
| @@ -0,0 +1,53 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.index; |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.search.DocIdSetIterator; |
| + |
| +/** |
| + * Extension of {@link PostingsEnum} which also provides information about the |
| + * produced scores. |
| + * @lucene.experimental |
| + */ |
| +public abstract class ImpactsEnum extends PostingsEnum { |
| + |
| + /** Sole constructor. */ |
| + protected ImpactsEnum() {} |
| + |
| + /** |
| + * Advance to the block of documents that contains {@code target} in order to |
| + * get scoring information about this block. This method is implicitly called |
| + * by {@link DocIdSetIterator#advance(int)} and |
| + * {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the |
| + * current {@link DocIdSetIterator#docID()}. |
| + * It returns a number that is greater than or equal to all documents |
| + * contained in the current block, but less than any doc IDS of the next block. |
| + * {@code target} must be >= {@link #docID()} as well as all targets that |
| + * have been passed to {@link #advanceShallow(int)} so far. |
| + */ |
| + public abstract int advanceShallow(int target) throws IOException; |
| + |
| + /** |
| + * Return the maximum score that documents between the last {@code target} |
| + * that this iterator was {@link #advanceShallow(int) shallow-advanced} to |
| + * included and {@code upTo} included. |
| + */ |
| + public abstract float getMaxScore(int upTo) throws IOException; |
| + |
| +} |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java |
| index 7db838b..7de8427 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java |
| @@ -21,6 +21,7 @@ import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Comparator; |
| |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| @@ -367,6 +368,11 @@ public final class MultiTermsEnum extends TermsEnum { |
| return docsEnum.reset(subDocs, upto); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| final static class TermsEnumWithSlice { |
| private final ReaderSlice subSlice; |
| TermsEnum terms; |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java |
| index c67b92d..ad60a94 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java |
| @@ -24,6 +24,7 @@ import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.DocValuesConsumer; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.NormsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PointsWriter; |
| import org.apache.lucene.codecs.StoredFieldsWriter; |
| import org.apache.lucene.codecs.TermVectorsWriter; |
| @@ -109,10 +110,33 @@ final class SegmentMerger { |
| |
| final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo, |
| mergeState.mergeFieldInfos, null, context); |
| + final SegmentReadState segmentReadState = new SegmentReadState(directory, mergeState.segmentInfo, mergeState.mergeFieldInfos, |
| + IOContext.READ, segmentWriteState.segmentSuffix); |
| + |
| + if (mergeState.mergeFieldInfos.hasNorms()) { |
| + if (mergeState.infoStream.isEnabled("SM")) { |
| + t0 = System.nanoTime(); |
| + } |
| + mergeNorms(segmentWriteState); |
| + if (mergeState.infoStream.isEnabled("SM")) { |
| + long t1 = System.nanoTime(); |
| + mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]"); |
| + } |
| + } |
| + |
| if (mergeState.infoStream.isEnabled("SM")) { |
| t0 = System.nanoTime(); |
| } |
| - mergeTerms(segmentWriteState); |
| + try (NormsProducer norms = mergeState.mergeFieldInfos.hasNorms() |
| + ? codec.normsFormat().normsProducer(segmentReadState) |
| + : null) { |
| + NormsProducer normsMergeInstance = null; |
| + if (norms != null) { |
| + // Use the merge instance in order to reuse the same IndexInput for all terms |
| + normsMergeInstance = norms.getMergeInstance(); |
| + } |
| + mergeTerms(segmentWriteState, normsMergeInstance); |
| + } |
| if (mergeState.infoStream.isEnabled("SM")) { |
| long t1 = System.nanoTime(); |
| mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge postings [" + numMerged + " docs]"); |
| @@ -139,17 +163,6 @@ final class SegmentMerger { |
| long t1 = System.nanoTime(); |
| mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge points [" + numMerged + " docs]"); |
| } |
| - |
| - if (mergeState.mergeFieldInfos.hasNorms()) { |
| - if (mergeState.infoStream.isEnabled("SM")) { |
| - t0 = System.nanoTime(); |
| - } |
| - mergeNorms(segmentWriteState); |
| - if (mergeState.infoStream.isEnabled("SM")) { |
| - long t1 = System.nanoTime(); |
| - mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]"); |
| - } |
| - } |
| |
| if (mergeState.mergeFieldInfos.hasVectors()) { |
| if (mergeState.infoStream.isEnabled("SM")) { |
| @@ -225,9 +238,9 @@ final class SegmentMerger { |
| } |
| } |
| |
| - private void mergeTerms(SegmentWriteState segmentWriteState) throws IOException { |
| + private void mergeTerms(SegmentWriteState segmentWriteState, NormsProducer norms) throws IOException { |
| try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState)) { |
| - consumer.merge(mergeState); |
| + consumer.merge(mergeState, norms); |
| } |
| } |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java |
| new file mode 100644 |
| index 0000000..9ba27e2 |
| --- /dev/null |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java |
| @@ -0,0 +1,94 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.index; |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.util.BytesRef; |
| + |
| +/** |
| + * {@link ImpactsEnum} that doesn't index impacts but implements the API in a |
| + * legal way. This should typically be used for short postings that do not need |
| + * skipping. |
| + */ |
| +public final class SlowImpactsEnum extends ImpactsEnum { |
| + |
| + private final PostingsEnum delegate; |
| + private final float maxScore; |
| + |
| + /** Wrap the given {@link PostingsEnum}. */ |
| + public SlowImpactsEnum(PostingsEnum delegate, float maxScore) { |
| + this.delegate = delegate; |
| + this.maxScore = maxScore; |
| + } |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + return delegate.nextDoc(); |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return delegate.docID(); |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return delegate.cost(); |
| + } |
| + |
| + @Override |
| + public int advance(int target) throws IOException { |
| + return delegate.advance(target); |
| + } |
| + |
| + @Override |
| + public int startOffset() throws IOException { |
| + return delegate.startOffset(); |
| + } |
| + |
| + @Override |
| + public int nextPosition() throws IOException { |
| + return delegate.nextPosition(); |
| + } |
| + |
| + @Override |
| + public BytesRef getPayload() throws IOException { |
| + return delegate.getPayload(); |
| + } |
| + |
| + @Override |
| + public int freq() throws IOException { |
| + return delegate.freq(); |
| + } |
| + |
| + @Override |
| + public int endOffset() throws IOException { |
| + return delegate.endOffset(); |
| + } |
| + |
| + @Override |
| + public int advanceShallow(int target) { |
| + return NO_MORE_DOCS; |
| + } |
| + |
| + @Override |
| + public float getMaxScore(int maxDoc) { |
| + return maxScore; |
| + } |
| + |
| +} |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java |
| index ccee7a3..70d4387 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java |
| @@ -19,6 +19,7 @@ package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| |
| @@ -110,6 +111,11 @@ class SortedDocValuesTermsEnum extends TermsEnum { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef term, TermState state) throws IOException { |
| assert state != null && state instanceof OrdTermState; |
| this.seekExact(((OrdTermState)state).ord); |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java |
| index eba95c9..9099ac8 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java |
| @@ -17,6 +17,7 @@ |
| package org.apache.lucene.index; |
| |
| |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| |
| @@ -110,6 +111,11 @@ class SortedSetDocValuesTermsEnum extends TermsEnum { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef term, TermState state) throws IOException { |
| assert state != null && state instanceof OrdTermState; |
| this.seekExact(((OrdTermState)state).ord); |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java |
| index dff808e..054ca50 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java |
| @@ -21,6 +21,7 @@ import java.io.IOException; |
| import java.util.Iterator; |
| import java.util.Map; |
| |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| import org.apache.lucene.codecs.TermVectorsWriter; |
| import org.apache.lucene.search.DocIdSetIterator; |
| @@ -37,8 +38,8 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer { |
| } |
| |
| @Override |
| - void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { |
| - super.flush(fieldsToFlush, state, sortMap); |
| + void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException { |
| + super.flush(fieldsToFlush, state, sortMap, norms); |
| if (tmpDirectory != null) { |
| if (sortMap == null) { |
| // we're lucky the index is already sorted, just rename the temporary file and return |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java |
| index 46dc63c..1ac20dd 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java |
| @@ -21,6 +21,7 @@ import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Map; |
| |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.TermVectorsWriter; |
| import org.apache.lucene.store.FlushInfo; |
| import org.apache.lucene.store.IOContext; |
| @@ -53,7 +54,7 @@ class TermVectorsConsumer extends TermsHash { |
| } |
| |
| @Override |
| - void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { |
| + void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException { |
| if (writer != null) { |
| int numDocs = state.segmentInfo.maxDoc(); |
| assert numDocs > 0; |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java |
| index 4b5755a..7bbb3f7 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java |
| @@ -19,6 +19,7 @@ package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefIterator; |
| @@ -171,6 +172,12 @@ public abstract class TermsEnum implements BytesRefIterator { |
| public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException; |
| |
| /** |
| + * Return a {@link ImpactsEnum} that computes impacts with {@code scorer}. |
| + * @see #postings(PostingsEnum, int) |
| + */ |
| + public abstract ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException; |
| + |
| + /** |
| * Expert: Returns the TermsEnums internal state to position the TermsEnum |
| * without re-seeking the term dictionary. |
| * <p> |
| @@ -228,7 +235,12 @@ public abstract class TermsEnum implements BytesRefIterator { |
| public PostingsEnum postings(PostingsEnum reuse, int flags) { |
| throw new IllegalStateException("this method should never be called"); |
| } |
| - |
| + |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new IllegalStateException("this method should never be called"); |
| + } |
| + |
| @Override |
| public BytesRef next() { |
| return null; |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHash.java b/lucene/core/src/java/org/apache/lucene/index/TermsHash.java |
| index bede2f8..f420aca 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/TermsHash.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/TermsHash.java |
| @@ -21,6 +21,7 @@ import java.io.IOException; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.util.ByteBlockPool; |
| import org.apache.lucene.util.Counter; |
| import org.apache.lucene.util.IntBlockPool; |
| @@ -76,13 +77,14 @@ abstract class TermsHash { |
| bytePool.reset(false, false); |
| } |
| |
| - void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException { |
| + void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, |
| + Sorter.DocMap sortMap, NormsProducer norms) throws IOException { |
| if (nextTermsHash != null) { |
| Map<String,TermsHashPerField> nextChildFields = new HashMap<>(); |
| for (final Map.Entry<String,TermsHashPerField> entry : fieldsToFlush.entrySet()) { |
| nextChildFields.put(entry.getKey(), entry.getValue().nextPerField); |
| } |
| - nextTermsHash.flush(nextChildFields, state, sortMap); |
| + nextTermsHash.flush(nextChildFields, state, sortMap, norms); |
| } |
| } |
| |
| diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java |
| index 881c5dd..72f9473 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java |
| +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java |
| @@ -17,11 +17,13 @@ |
| package org.apache.lucene.search; |
| |
| |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.Attribute; |
| import org.apache.lucene.util.AttributeImpl; |
| import org.apache.lucene.util.AttributeReflector; |
| @@ -273,6 +275,11 @@ public final class FuzzyTermsEnum extends TermsEnum { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return actualEnum.impacts(scorer, flags); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef term, TermState state) throws IOException { |
| actualEnum.seekExact(term, state); |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java |
| index 5de8295..f3dc5ea 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java |
| @@ -40,6 +40,11 @@ public final class LeafSimScorer { |
| maxScore = needsScores ? scorer.score(maxFreq, 1) : Float.MAX_VALUE; |
| } |
| |
| + /** Return the wrapped {@link SimScorer}. */ |
| + public SimScorer getSimScorer() { |
| + return scorer; |
| + } |
| + |
| private long getNormValue(int doc) throws IOException { |
| if (norms != null) { |
| boolean found = norms.advanceExact(doc); |
| diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java |
| index d9335cf..1eba910 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java |
| +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java |
| @@ -29,7 +29,6 @@ import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermStates; |
| import org.apache.lucene.index.TermState; |
| @@ -208,9 +207,8 @@ public final class SynonymQuery extends Query { |
| termsEnum.seekExact(terms[i].bytes(), state); |
| long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); |
| totalMaxFreq += termMaxFreq; |
| - PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS); |
| LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq); |
| - subScorers.add(new TermScorer(this, postings, simScorer)); |
| + subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer)); |
| } |
| } |
| if (subScorers.isEmpty()) { |
| diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java |
| index d629acd..f1f4415 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java |
| +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java |
| @@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.IndexReaderContext; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.ReaderUtil; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermStates; |
| @@ -46,21 +45,21 @@ public class TermQuery extends Query { |
| private final Similarity similarity; |
| private final Similarity.SimScorer simScorer; |
| private final TermStates termStates; |
| - private final boolean needsScores; |
| + private final ScoreMode scoreMode; |
| |
| - public TermWeight(IndexSearcher searcher, boolean needsScores, |
| + public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, |
| float boost, TermStates termStates) throws IOException { |
| super(TermQuery.this); |
| - if (needsScores && termStates == null) { |
| + if (scoreMode.needsScores() && termStates == null) { |
| throw new IllegalStateException("termStates are required when scores are needed"); |
| } |
| - this.needsScores = needsScores; |
| + this.scoreMode = scoreMode; |
| this.termStates = termStates; |
| this.similarity = searcher.getSimilarity(); |
| |
| final CollectionStatistics collectionStats; |
| final TermStatistics termStats; |
| - if (needsScores) { |
| + if (scoreMode.needsScores()) { |
| collectionStats = searcher.collectionStatistics(term.field()); |
| termStats = searcher.termStatistics(term, termStates); |
| } else { |
| @@ -97,10 +96,9 @@ public class TermQuery extends Query { |
| .getFieldInfos() |
| .fieldInfo(getTerm().field()) |
| .getIndexOptions(); |
| - PostingsEnum docs = termsEnum.postings(null, needsScores ? PostingsEnum.FREQS : PostingsEnum.NONE); |
| - assert docs != null; |
| float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq()); |
| - return new TermScorer(this, docs, new LeafSimScorer(simScorer, context.reader(), needsScores, maxFreq)); |
| + LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq); |
| + return new TermScorer(this, termsEnum, scoreMode, scorer); |
| } |
| |
| private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) { |
| @@ -198,7 +196,7 @@ public class TermQuery extends Query { |
| termState = this.perReaderTermState; |
| } |
| |
| - return new TermWeight(searcher, scoreMode.needsScores(), boost, termState); |
| + return new TermWeight(searcher, scoreMode, boost, termState); |
| } |
| |
| /** Prints a user-readable version of this query. */ |
| diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java |
| index 653a60e..fc426da 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java |
| +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java |
| @@ -19,29 +19,92 @@ package org.apache.lucene.search; |
| |
| import java.io.IOException; |
| |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| +import org.apache.lucene.index.TermsEnum; |
| |
| /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>. |
| */ |
| final class TermScorer extends Scorer { |
| private final PostingsEnum postingsEnum; |
| + private final DocIdSetIterator iterator; |
| private final LeafSimScorer docScorer; |
| + private float minCompetitiveScore; |
| |
| /** |
| * Construct a <code>TermScorer</code>. |
| * |
| * @param weight |
| * The weight of the <code>Term</code> in the query. |
| - * @param td |
| - * An iterator over the documents matching the <code>Term</code>. |
| + * @param te |
| + * A {@link TermsEnum} positioned on the expected term. |
| * @param docScorer |
| - * The <code>Similarity.SimScorer</code> implementation |
| - * to be used for score computations. |
| + * A {@link LeafSimScorer} for the appropriate field. |
| */ |
| - TermScorer(Weight weight, PostingsEnum td, LeafSimScorer docScorer) { |
| + TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException { |
| super(weight); |
| this.docScorer = docScorer; |
| - this.postingsEnum = td; |
| + if (scoreMode == ScoreMode.TOP_SCORES) { |
| + ImpactsEnum impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS); |
| + postingsEnum = impactsEnum; |
| + iterator = new DocIdSetIterator() { |
| + |
| + int upTo = -1; |
| + float maxScore; |
| + |
| + private int advanceTarget(int target) throws IOException { |
| + if (minCompetitiveScore == 0) { |
| + // no potential for skipping |
| + return target; |
| + } |
| + |
| + if (target > upTo) { |
| + upTo = impactsEnum.advanceShallow(target); |
| + maxScore = impactsEnum.getMaxScore(upTo); |
| + } |
| + |
| + while (true) { |
| + assert upTo >= target; |
| + |
| + if (maxScore >= minCompetitiveScore) { |
| + return target; |
| + } |
| + |
| + if (upTo == NO_MORE_DOCS) { |
| + return NO_MORE_DOCS; |
| + } |
| + |
| + target = upTo + 1; |
| + |
| + upTo = impactsEnum.advanceShallow(target); |
| + maxScore = impactsEnum.getMaxScore(upTo); |
| + } |
| + } |
| + |
| + @Override |
| + public int advance(int target) throws IOException { |
| + return impactsEnum.advance(advanceTarget(target)); |
| + } |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + return advance(impactsEnum.docID() + 1); |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return impactsEnum.docID(); |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return impactsEnum.cost(); |
| + } |
| + }; |
| + } else { |
| + postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE); |
| + iterator = postingsEnum; |
| + } |
| } |
| |
| @Override |
| @@ -55,7 +118,7 @@ final class TermScorer extends Scorer { |
| |
| @Override |
| public DocIdSetIterator iterator() { |
| - return postingsEnum; |
| + return iterator; |
| } |
| |
| @Override |
| @@ -69,6 +132,11 @@ final class TermScorer extends Scorer { |
| return docScorer.maxScore(); |
| } |
| |
| + @Override |
| + public void setMinCompetitiveScore(float minScore) { |
| + this.minCompetitiveScore = minScore; |
| + } |
| + |
| /** Returns a string representation of this <code>TermScorer</code>. */ |
| @Override |
| public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; } |
| diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java |
| new file mode 100644 |
| index 0000000..5743e64 |
| --- /dev/null |
| +++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java |
| @@ -0,0 +1,104 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.codecs; |
| + |
| +import java.util.Collections; |
| +import java.util.HashSet; |
| +import java.util.Set; |
| + |
| +import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase { |
| + |
| + public void testBasics() { |
| + CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator(); |
| + Set<FreqAndNorm> expected = new HashSet<>(); |
| + |
| + acc.add(3, 5); |
| + expected.add(new FreqAndNorm(3, 5)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(6, 11); |
| + expected.add(new FreqAndNorm(6, 11)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(10, 13); |
| + expected.add(new FreqAndNorm(10, 13)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(1, 2); |
| + expected.add(new FreqAndNorm(1, 2)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(7, 9); |
| + expected.remove(new FreqAndNorm(6, 11)); |
| + expected.add(new FreqAndNorm(7, 9)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(8, 2); |
| + expected.clear(); |
| + expected.add(new FreqAndNorm(10, 13)); |
| + expected.add(new FreqAndNorm(8, 2)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + } |
| + |
| + public void testExtremeNorms() { |
| + CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator(); |
| + Set<FreqAndNorm> expected = new HashSet<>(); |
| + |
| + acc.add(3, 5); |
| + expected.add(new FreqAndNorm(3, 5)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(10, 10000); |
| + expected.add(new FreqAndNorm(10, 10000)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(5, 200); |
| + expected.add(new FreqAndNorm(5, 200)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(20, -100); |
| + expected.add(new FreqAndNorm(20, -100)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + |
| + acc.add(30, -3); |
| + expected.add(new FreqAndNorm(30, -3)); |
| + assertEquals(expected, acc.getCompetitiveFreqNormPairs()); |
| + } |
| + |
| + public void testOmitFreqs() { |
| + CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator(); |
| + |
| + acc.add(1, 5); |
| + acc.add(1, 7); |
| + acc.add(1, 4); |
| + |
| + assertEquals(Collections.singleton(new FreqAndNorm(1, 4)), acc.getCompetitiveFreqNormPairs()); |
| + } |
| + |
| + public void testOmitNorms() { |
| + CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator(); |
| + |
| + acc.add(5, 1); |
| + acc.add(7, 1); |
| + acc.add(4, 1); |
| + |
| + assertEquals(Collections.singleton(new FreqAndNorm(7, 1)), acc.getCompetitiveFreqNormPairs()); |
| + } |
| +} |
| diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java |
| index f2ed86c..d507b7b 100644 |
| --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java |
| +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java |
| @@ -17,8 +17,11 @@ |
| package org.apache.lucene.codecs.lucene50; |
| |
| |
| +import java.io.IOException; |
| + |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| +import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator; |
| import org.apache.lucene.codecs.blocktree.FieldReader; |
| import org.apache.lucene.codecs.blocktree.Stats; |
| import org.apache.lucene.document.Document; |
| @@ -27,7 +30,12 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| +import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.IOContext; |
| +import org.apache.lucene.store.IndexInput; |
| +import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.TestUtil; |
| |
| /** |
| @@ -78,4 +86,56 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase { |
| shouldFail(10, -1); |
| shouldFail(10, 12); |
| } |
| + |
| + public void testImpactSerialization() throws IOException { |
| + // omit norms and omit freqs |
| + doTestImpactSerialization(new int[] { 1 }, new long[] { 1L }); |
| + |
| + // omit freqs |
| + doTestImpactSerialization(new int[] { 1 }, new long[] { 42L }); |
| + // omit freqs with very large norms |
| + doTestImpactSerialization(new int[] { 1 }, new long[] { -100L }); |
| + |
| + // omit norms |
| + doTestImpactSerialization(new int[] { 30 }, new long[] { 1L }); |
| + // omit norms with large freq |
| + doTestImpactSerialization(new int[] { 500 }, new long[] { 1L }); |
| + |
| + // freqs and norms, basic |
| + doTestImpactSerialization( |
| + new int[] { 1, 3, 7, 15, 20, 28 }, |
| + new long[] { 7L, 9L, 10L, 11L, 13L, 14L }); |
| + |
| + // freqs and norms, high values |
| + doTestImpactSerialization( |
| + new int[] { 2, 10, 12, 50, 1000, 1005 }, |
| + new long[] { 2L, 10L, 50L, -100L, -80L, -3L }); |
| + } |
| + |
| + private void doTestImpactSerialization(int[] freqs, long[] norms) throws IOException { |
| + CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator(); |
| + for (int i = 0; i < freqs.length; ++i) { |
| + acc.add(freqs[i], norms[i]); |
| + } |
| + try(Directory dir = newDirectory()) { |
| + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { |
| + Lucene50SkipWriter.writeImpacts(acc, out); |
| + } |
| + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { |
| + byte[] b = new byte[Math.toIntExact(in.length())]; |
| + in.readBytes(b, 0, b.length); |
| + Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new SimScorer("") { |
| + int i = 0; |
| + |
| + @Override |
| + public float score(float freq, long norm) { |
| + assert freq == freqs[i]; |
| + assert norm == norms[i]; |
| + i++; |
| + return 0; |
| + } |
| + }); |
| + } |
| + } |
| + } |
| } |
| diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java |
| index 804f507..84544bc 100644 |
| --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java |
| +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java |
| @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.asserting.AssertingCodec; |
| import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval; |
| @@ -407,17 +408,17 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { |
| final FieldsConsumer consumer = delegate.fieldsConsumer(state); |
| return new FieldsConsumer() { |
| @Override |
| - public void write(Fields fields) throws IOException { |
| - consumer.write(fields); |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| + consumer.write(fields, norms); |
| } |
| |
| @Override |
| - public void merge(MergeState mergeState) throws IOException { |
| + public void merge(MergeState mergeState, NormsProducer norms) throws IOException { |
| nbMergeCalls++; |
| for (FieldInfo fi : mergeState.mergeFieldInfos) { |
| fieldNames.add(fi.name); |
| } |
| - consumer.merge(mergeState); |
| + consumer.merge(mergeState, norms); |
| } |
| |
| @Override |
| diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java |
| index 4625f73..efe4587 100644 |
| --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java |
| +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java |
| @@ -17,6 +17,7 @@ |
| package org.apache.lucene.index; |
| |
| |
| +import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| @@ -28,10 +29,12 @@ import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| @@ -676,6 +679,10 @@ public class TestCodecs extends LuceneTestCase { |
| return new DataPostingsEnum(fieldData.terms[upto]); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| } |
| |
| private static class DataPostingsEnum extends PostingsEnum { |
| @@ -752,9 +759,65 @@ public class TestCodecs extends LuceneTestCase { |
| |
| Arrays.sort(fields); |
| FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); |
| + NormsProducer fakeNorms = new NormsProducer() { |
| + |
| + @Override |
| + public long ramBytesUsed() { |
| + return 0; |
| + } |
| + |
| + @Override |
| + public void close() throws IOException {} |
| + |
| + @Override |
| + public NumericDocValues getNorms(FieldInfo field) throws IOException { |
| + return new NumericDocValues() { |
| + |
| + int doc = -1; |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + return advance(doc + 1); |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return doc; |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return si.maxDoc(); |
| + } |
| + |
| + @Override |
| + public int advance(int target) throws IOException { |
| + if (target >= si.maxDoc()) { |
| + return doc = NO_MORE_DOCS; |
| + } else { |
| + return doc = target; |
| + } |
| + } |
| + |
| + @Override |
| + public boolean advanceExact(int target) throws IOException { |
| + doc = target; |
| + return true; |
| + } |
| + |
| + @Override |
| + public long longValue() throws IOException { |
| + return 1; |
| + } |
| + }; |
| + } |
| + |
| + @Override |
| + public void checkIntegrity() throws IOException {} |
| + }; |
| boolean success = false; |
| try { |
| - consumer.write(new DataFields(fields)); |
| + consumer.write(new DataFields(fields), fakeNorms); |
| success = true; |
| } finally { |
| if (success) { |
| diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java |
| index 2758c96..73d3e6a 100644 |
| --- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java |
| +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java |
| @@ -24,12 +24,18 @@ import java.util.List; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.StringField; |
| +import org.apache.lucene.document.Field.Store; |
| +import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.FilterLeafReader; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.search.similarities.ClassicSimilarity; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase; |
| @@ -202,4 +208,57 @@ public class TestTermScorer extends LuceneTestCase { |
| // should not fail this time since norms are not necessary |
| weight2.scorer(forbiddenNorms.getContext()).iterator().nextDoc(); |
| } |
| + |
| + public void testRandomTopDocs() throws IOException { |
| + Directory dir = newDirectory(); |
| + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); |
| + int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data |
| + for (int i = 0; i < numDocs; ++i) { |
| + Document doc = new Document(); |
| + int numValues = random().nextInt(1 << random().nextInt(5)); |
| + int start = random().nextInt(10); |
| + for (int j = 0; j < numValues; ++j) { |
| + doc.add(new StringField("foo", Integer.toString(start + j), Store.NO)); |
| + } |
| + w.addDocument(doc); |
| + } |
| + IndexReader reader = DirectoryReader.open(w); |
| + w.close(); |
| + IndexSearcher searcher = newSearcher(reader); |
| + |
| + for (int iter = 0; iter < 15; ++iter) { |
| + Query query = new TermQuery(new Term("foo", Integer.toString(iter))); |
| + |
| + TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE |
| + TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES |
| + |
| + searcher.search(query, collector1); |
| + searcher.search(query, collector2); |
| + assertTopDocsEquals(collector1.topDocs(), collector2.topDocs()); |
| + |
| + int filterTerm = random().nextInt(15); |
| + Query filteredQuery = new BooleanQuery.Builder() |
| + .add(query, Occur.MUST) |
| + .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER) |
| + .build(); |
| + |
| + collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE |
| + collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES |
| + searcher.search(filteredQuery, collector1); |
| + searcher.search(filteredQuery, collector2); |
| + assertTopDocsEquals(collector1.topDocs(), collector2.topDocs()); |
| + } |
| + reader.close(); |
| + dir.close(); |
| + } |
| + |
| + private static void assertTopDocsEquals(TopDocs td1, TopDocs td2) { |
| + assertEquals(td1.scoreDocs.length, td2.scoreDocs.length); |
| + for (int i = 0; i < td1.scoreDocs.length; ++i) { |
| + ScoreDoc sd1 = td1.scoreDocs[i]; |
| + ScoreDoc sd2 = td2.scoreDocs[i]; |
| + assertEquals(sd1.doc, sd2.doc); |
| + assertEquals(sd1.score, sd2.score, 0f); |
| + } |
| + } |
| } |
| diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java |
| index 0d8d949..4014e8c 100644 |
| --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java |
| +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java |
| @@ -42,6 +42,7 @@ import org.apache.lucene.search.ScoreMode; |
| import org.apache.lucene.search.Scorer; |
| import org.apache.lucene.search.SimpleCollector; |
| import org.apache.lucene.search.similarities.Similarity; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.Bits; |
| @@ -1425,6 +1426,11 @@ public class MemoryIndex { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1L)); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef term, TermState state) throws IOException { |
| assert state != null; |
| this.seekExact(((OrdTermState)state).ord); |
| diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java |
| index 54f4aa4..4203e07 100644 |
| --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java |
| +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java |
| @@ -23,7 +23,9 @@ import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.SegmentReadState; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.IndexInput; |
| |
| @@ -88,6 +90,11 @@ final class IDVersionPostingsReader extends PostingsReaderBase { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly"); |
| + } |
| + |
| + @Override |
| public long ramBytesUsed() { |
| return 0; |
| } |
| diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java |
| index fc643d2..30e1980 100644 |
| --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java |
| +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java |
| @@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.PushPostingsWriterBase; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexOptions; |
| +import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.IndexOutput; |
| @@ -78,7 +79,7 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase { |
| } |
| |
| @Override |
| - public void startTerm() { |
| + public void startTerm(NumericDocValues norms) { |
| lastDocID = -1; |
| } |
| |
| diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java |
| index 0af64d9..d5f51e0 100644 |
| --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java |
| +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java |
| @@ -20,9 +20,12 @@ import java.io.IOException; |
| import java.io.PrintStream; |
| |
| import org.apache.lucene.codecs.BlockTermState; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.PostingsEnum; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| @@ -1006,6 +1009,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + // Only one posting, the slow impl is fine |
| + // We could make this throw UOE but then CheckIndex is angry |
| + return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| + |
| + @Override |
| public void seekExact(BytesRef target, TermState otherState) { |
| // if (DEBUG) { |
| // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState); |
| diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java |
| index e9187af..078ca8b 100644 |
| --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java |
| +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java |
| @@ -23,6 +23,7 @@ import java.util.List; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsWriterBase; |
| import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; |
| import org.apache.lucene.index.FieldInfo; |
| @@ -221,7 +222,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| |
| String lastField = null; |
| for(String field : fields) { |
| @@ -241,7 +242,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { |
| if (term == null) { |
| break; |
| } |
| - termsWriter.write(term, termsEnum); |
| + termsWriter.write(term, termsEnum, norms); |
| } |
| |
| termsWriter.finish(); |
| @@ -729,8 +730,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { |
| } |
| |
| /** Writes one term's worth of postings. */ |
| - public void write(BytesRef text, TermsEnum termsEnum) throws IOException { |
| - BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen); |
| + public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException { |
| + BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms); |
| // TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on flush: |
| if (state != null && ((IDVersionPostingsWriter) postingsWriter).lastDocID != -1) { |
| assert state.docFreq != 0; |
| diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java |
| index 9df9d60..5ab16a9 100644 |
| --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java |
| +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java |
| @@ -22,6 +22,7 @@ import java.util.Map; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.Fields; |
| @@ -44,7 +45,7 @@ import static org.apache.lucene.search.suggest.document.CompletionPostingsFormat |
| |
| /** |
| * <p> |
| - * Weighted FSTs for any indexed {@link SuggestField} is built on {@link #write(Fields)}. |
| + * Weighted FSTs for any indexed {@link SuggestField} is built on {@link #write(Fields,NormsProducer)}. |
| * A weighted FST maps the analyzed forms of a field to its |
| * surface form and document id. FSTs are stored in the CompletionDictionary (.lkp). |
| * </p> |
| @@ -80,8 +81,8 @@ final class CompletionFieldsConsumer extends FieldsConsumer { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| - delegateFieldsConsumer.write(fields); |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| + delegateFieldsConsumer.write(fields, norms); |
| |
| for (String field : fields) { |
| CompletionTermWriter termWriter = new CompletionTermWriter(); |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java |
| index a89b508..e71903d 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java |
| @@ -22,6 +22,7 @@ import java.util.Iterator; |
| |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.index.AssertingLeafReader; |
| import org.apache.lucene.index.FieldInfo; |
| @@ -133,8 +134,8 @@ public final class AssertingPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| - in.write(fields); |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| + in.write(fields, norms); |
| |
| // TODO: more asserts? can we somehow run a |
| // "limited" CheckIndex here??? Or ... can we improve |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java |
| index 2ca1bc7..00e168b 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java |
| @@ -21,6 +21,7 @@ import java.util.Random; |
| |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.index.Fields; |
| import org.apache.lucene.index.SegmentReadState; |
| @@ -61,11 +62,11 @@ class CrankyPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| if (random.nextInt(100) == 0) { |
| throw new IOException("Fake IOException from FieldsConsumer.write()"); |
| } |
| - delegate.write(fields); |
| + delegate.write(fields, norms); |
| } |
| |
| @Override |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java |
| index 4b85f13..37c078f 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java |
| @@ -31,17 +31,21 @@ import java.util.concurrent.atomic.AtomicInteger; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.codecs.TermStats; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.Fields; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SegmentWriteState; |
| +import org.apache.lucene.index.SlowImpactsEnum; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.Accountable; |
| @@ -227,7 +231,7 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { |
| } |
| |
| @Override |
| - public void write(Fields fields) throws IOException { |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| for(String field : fields) { |
| |
| Terms terms = fields.terms(field); |
| @@ -472,6 +476,10 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { |
| return new RAMDocsEnum(ramField.termToDocs.get(current)); |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1)); |
| + } |
| } |
| |
| private static class RAMDocsEnum extends PostingsEnum { |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java |
| index c87697b..dfec1db 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java |
| @@ -23,6 +23,7 @@ import java.util.Objects; |
| import org.apache.lucene.index.PointValues.IntersectVisitor; |
| import org.apache.lucene.index.PointValues.Relation; |
| import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.StringHelper; |
| @@ -209,6 +210,15 @@ public class AssertingLeafReader extends FilterLeafReader { |
| } |
| } |
| |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + assertThread("Terms enums", creationThread); |
| + assert state == State.POSITIONED: "docs(...) called on unpositioned TermsEnum"; |
| + assert (flags & PostingsEnum.FREQS) != 0 : "Freqs should be requested on impacts"; |
| + |
| + return new AssertingImpactsEnum(super.impacts(scorer, flags)); |
| + } |
| + |
| // TODO: we should separately track if we are 'at the end' ? |
| // someone should not call next() after it returns null!!!! |
| @Override |
| @@ -439,6 +449,84 @@ public class AssertingLeafReader extends FilterLeafReader { |
| } |
| } |
| |
| + /** Wraps a {@link ImpactsEnum} with additional checks */ |
| + public static class AssertingImpactsEnum extends ImpactsEnum { |
| + |
| + private final AssertingPostingsEnum assertingPostings; |
| + private final ImpactsEnum in; |
| + private int lastShallowTarget; |
| + |
| + AssertingImpactsEnum(ImpactsEnum impacts) { |
| + in = impacts; |
| + // inherit checks from AssertingPostingsEnum |
| + assertingPostings = new AssertingPostingsEnum(impacts); |
| + } |
| + |
| + @Override |
| + public int advanceShallow(int target) throws IOException { |
| + assert target >= lastShallowTarget : "called on decreasing targets: target = " + target + " < last target = " + lastShallowTarget; |
| + assert target >= docID() : "target = " + target + " < docID = " + docID(); |
| + int upTo = in.advanceShallow(target); |
| + assert upTo >= target : "upTo = " + upTo + " < target = " + target; |
| + lastShallowTarget = target; |
| + return upTo; |
| + } |
| + |
| + @Override |
| + public float getMaxScore(int upTo) throws IOException { |
| + assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last shallow target = " + lastShallowTarget; |
| + float maxScore = in.getMaxScore(upTo); |
| + return maxScore; |
| + } |
| + |
| + @Override |
| + public int freq() throws IOException { |
| + return assertingPostings.freq(); |
| + } |
| + |
| + @Override |
| + public int nextPosition() throws IOException { |
| + return assertingPostings.nextPosition(); |
| + } |
| + |
| + @Override |
| + public int startOffset() throws IOException { |
| + return assertingPostings.startOffset(); |
| + } |
| + |
| + @Override |
| + public int endOffset() throws IOException { |
| + return assertingPostings.endOffset(); |
| + } |
| + |
| + @Override |
| + public BytesRef getPayload() throws IOException { |
| + return assertingPostings.getPayload(); |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return assertingPostings.docID(); |
| + } |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + assert docID() + 1 >= lastShallowTarget : "target = " + (docID() + 1) + " < last shallow target = " + lastShallowTarget; |
| + return assertingPostings.nextDoc(); |
| + } |
| + |
| + @Override |
| + public int advance(int target) throws IOException { |
| + assert target >= lastShallowTarget : "target = " + target + " < last shallow target = " + lastShallowTarget; |
| + return assertingPostings.advance(target); |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return assertingPostings.cost(); |
| + } |
| + } |
| + |
| /** Wraps a NumericDocValues but with additional asserts */ |
| public static class AssertingNumericDocValues extends NumericDocValues { |
| private final Thread creationThread = Thread.currentThread(); |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java |
| index ab92946..f5b5223 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java |
| @@ -334,8 +334,30 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { |
| SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ); |
| |
| // PostingsFormat |
| + NormsProducer fakeNorms = new NormsProducer() { |
| + |
| + @Override |
| + public void close() throws IOException {} |
| + |
| + @Override |
| + public long ramBytesUsed() { |
| + return 0; |
| + } |
| + |
| + @Override |
| + public NumericDocValues getNorms(FieldInfo field) throws IOException { |
| + if (field.hasNorms() == false) { |
| + return null; |
| + } |
| + return oneDocReader.getNormValues(field.name); |
| + } |
| + |
| + @Override |
| + public void checkIntegrity() throws IOException {} |
| + |
| + }; |
| try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) { |
| - consumer.write(MultiFields.getFields(oneDocReader)); |
| + consumer.write(MultiFields.getFields(oneDocReader), fakeNorms); |
| IOUtils.close(consumer); |
| IOUtils.close(consumer); |
| } |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java |
| index 617a721..6b301f9 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java |
| @@ -450,9 +450,10 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas |
| } |
| |
| Directory dir = applyCreatedVersionMajor(newDirectory()); |
| - Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); |
| - IndexWriterConfig conf = newIndexWriterConfig(analyzer);conf.setMergePolicy(NoMergePolicy.INSTANCE); |
| - conf.setSimilarity(new CannedNormSimilarity(norms)); |
| + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); |
| + IndexWriterConfig conf = newIndexWriterConfig(analyzer).setMergePolicy(NoMergePolicy.INSTANCE); |
| + CannedNormSimilarity sim = new CannedNormSimilarity(norms); |
| + conf.setSimilarity(sim); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); |
| Document doc = new Document(); |
| Field idField = new StringField("id", "", Field.Store.NO); |
| @@ -471,7 +472,8 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas |
| } else { |
| long value = norms[j++]; |
| dvField.setLongValue(value); |
| - indexedField.setStringValue(Long.toString(value)); |
| + // only empty fields may have 0 as a norm |
| + indexedField.setStringValue(value == 0 ? "" : "a"); |
| writer.addDocument(doc); |
| } |
| if (random().nextInt(31) == 0) { |
| @@ -530,7 +532,13 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas |
| |
| @Override |
| public long computeNorm(FieldInvertState state) { |
| - return norms[index++]; |
| + assert state.length > 0; |
| + while (true) { |
| + long norm = norms[index++]; |
| + if (norm != 0) { |
| + return norm; |
| + } |
| + } |
| } |
| |
| @Override |
| @@ -642,7 +650,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas |
| } |
| |
| Directory dir = applyCreatedVersionMajor(newDirectory()); |
| - Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); |
| + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); |
| IndexWriterConfig conf = newIndexWriterConfig(analyzer);conf.setMergePolicy(NoMergePolicy.INSTANCE); |
| conf.setSimilarity(new CannedNormSimilarity(norms)); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); |
| @@ -663,7 +671,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas |
| } else { |
| long value = norms[j++]; |
| dvField.setLongValue(value); |
| - indexedField.setStringValue(Long.toString(value)); |
| + indexedField.setStringValue(value == 0 ? "" : "a"); |
| writer.addDocument(doc); |
| } |
| if (random().nextInt(31) == 0) { |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java |
| index f69ca55..51b418e 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java |
| @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.Token; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.FilterCodec; |
| +import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| @@ -398,8 +399,8 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest |
| |
| return new FieldsConsumer() { |
| @Override |
| - public void write(Fields fields) throws IOException { |
| - fieldsConsumer.write(fields); |
| + public void write(Fields fields, NormsProducer norms) throws IOException { |
| + fieldsConsumer.write(fields, norms); |
| |
| boolean isMerge = state.context.context == IOContext.Context.MERGE; |
| |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java |
| index d5eb105..278f4b2 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java |
| @@ -32,10 +32,14 @@ import java.util.Random; |
| import java.util.Set; |
| import java.util.SortedMap; |
| import java.util.TreeMap; |
| +import java.util.function.IntToLongFunction; |
| |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| +import org.apache.lucene.codecs.NormsProducer; |
| +import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FlushInfo; |
| import org.apache.lucene.store.IOContext; |
| @@ -60,6 +64,9 @@ import static org.junit.Assert.assertTrue; |
| /** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */ |
| public class RandomPostingsTester { |
| |
| + private static final IntToLongFunction DOC_TO_NORM = doc -> 1 + (doc & 0x0f); |
| + private static final long MAX_NORM = 0x10; |
| + |
| /** Which features to test. */ |
| public enum Option { |
| // Sometimes use .advance(): |
| @@ -599,6 +606,11 @@ public class RandomPostingsTester { |
| } |
| return getSeedPostings(current.getKey().utf8ToString(), current.getValue().seed, maxAllowed, allowPayloads); |
| } |
| + |
| + @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| } |
| |
| private static class ThreadState { |
| @@ -653,10 +665,70 @@ public class RandomPostingsTester { |
| |
| Fields seedFields = new SeedFields(fields, newFieldInfos, maxAllowed, allowPayloads); |
| |
| + NormsProducer fakeNorms = new NormsProducer() { |
| + |
| + @Override |
| + public void close() throws IOException {} |
| + |
| + @Override |
| + public long ramBytesUsed() { |
| + return 0; |
| + } |
| + |
| + @Override |
| + public NumericDocValues getNorms(FieldInfo field) throws IOException { |
| + if (newFieldInfos.fieldInfo(field.number).hasNorms()) { |
| + return new NumericDocValues() { |
| + |
| + int doc = -1; |
| + |
| + @Override |
| + public int nextDoc() throws IOException { |
| + if (++doc == segmentInfo.maxDoc()) { |
| + return doc = NO_MORE_DOCS; |
| + } |
| + return doc; |
| + } |
| + |
| + @Override |
| + public int docID() { |
| + return doc; |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return segmentInfo.maxDoc(); |
| + } |
| + |
| + @Override |
| + public int advance(int target) throws IOException { |
| + return doc = target >= segmentInfo.maxDoc() ? DocIdSetIterator.NO_MORE_DOCS : target; |
| + } |
| + |
| + @Override |
| + public boolean advanceExact(int target) throws IOException { |
| + doc = target; |
| + return true; |
| + } |
| + |
| + @Override |
| + public long longValue() throws IOException { |
| + return DOC_TO_NORM.applyAsLong(doc); |
| + } |
| + }; |
| + } else { |
| + return null; |
| + } |
| + } |
| + |
| + @Override |
| + public void checkIntegrity() throws IOException {} |
| + |
| + }; |
| FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState); |
| boolean success = false; |
| try { |
| - consumer.write(seedFields); |
| + consumer.write(seedFields, fakeNorms); |
| success = true; |
| } finally { |
| if (success) { |
| @@ -975,6 +1047,136 @@ public class RandomPostingsTester { |
| } |
| } |
| } |
| + |
| + if (options.contains(Option.SKIPPING)) { |
| + final IntToLongFunction docToNorm; |
| + if (fieldInfo.hasNorms()) { |
| + docToNorm = DOC_TO_NORM; |
| + } else { |
| + docToNorm = doc -> 1L; |
| + } |
| + for (int s = 0; s < 3; ++s) { |
| + final int scoreMode = s; |
| + SimScorer scorer = new SimScorer(field) { |
| + @Override |
| + public float score(float freq, long norm) { |
| + switch (scoreMode) { |
| + case 0: |
| + return freq; // make sure the postings record the best freq |
| + case 1: |
| + return 1f / norm; // make sure the postings record the best norm |
| + default: |
| + return freq - norm + MAX_NORM; // now a combination that could make intermediate pairs more competitive |
| + } |
| + } |
| + }; |
| + |
| + // First check max scores and block uptos |
| + int max = -1; |
| + float maxScore = 0; |
| + int flags = PostingsEnum.FREQS; |
| + if (doCheckPositions) { |
| + flags |= PostingsEnum.POSITIONS; |
| + if (doCheckOffsets) { |
| + flags |= PostingsEnum.OFFSETS; |
| + } |
| + if (doCheckPayloads) { |
| + flags |= PostingsEnum.PAYLOADS; |
| + } |
| + } |
| + |
| + ImpactsEnum impacts = termsEnum.impacts(scorer, flags); |
| + PostingsEnum postings = termsEnum.postings(null, flags); |
| + for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) { |
| + assertEquals(postings.nextDoc(), doc); |
| + if (doc == DocIdSetIterator.NO_MORE_DOCS) { |
| + break; |
| + } |
| + int freq = postings.freq(); |
| + assertEquals("freq is wrong", freq, impacts.freq()); |
| + for (int i = 0; i < freq; ++i) { |
| + int pos = postings.nextPosition(); |
| + assertEquals("position is wrong", pos, impacts.nextPosition()); |
| + if (doCheckOffsets) { |
| + assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset()); |
| + assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset()); |
| + } |
| + if (doCheckPayloads) { |
| + assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload()); |
| + } |
| + } |
| + if (doc > max) { |
| + max = impacts.advanceShallow(doc); |
| + assertTrue(max >= doc); |
| + maxScore = impacts.getMaxScore(max); |
| + } |
| + assertEquals(max, impacts.advanceShallow(doc)); |
| + assertTrue(scorer.score(impacts.freq(), docToNorm.applyAsLong(doc)) <= maxScore); |
| + } |
| + |
| + // Now check advancing |
| + impacts = termsEnum.impacts(scorer, flags); |
| + postings = termsEnum.postings(postings, flags); |
| + |
| + max = -1; |
| + while (true) { |
| + int doc = impacts.docID(); |
| + boolean advance; |
| + int target; |
| + if (random.nextBoolean()) { |
| + advance = false; |
| + target = doc + 1; |
| + } else { |
| + advance = true; |
| + int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc); |
| + target = impacts.docID() + delta; |
| + } |
| + |
| + if (target > max && random.nextBoolean()) { |
| + int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target); |
| + max = target + delta; |
| + int m = impacts.advanceShallow(target); |
| + assertTrue(m >= target); |
| + maxScore = impacts.getMaxScore(max); |
| + } |
| + |
| + if (advance) { |
| + doc = impacts.advance(target); |
| + } else { |
| + doc = impacts.nextDoc(); |
| + } |
| + |
| + assertEquals(postings.advance(target), doc); |
| + if (doc == DocIdSetIterator.NO_MORE_DOCS) { |
| + break; |
| + } |
| + int freq = postings.freq(); |
| + assertEquals("freq is wrong", freq, impacts.freq()); |
| + for (int i = 0; i < postings.freq(); ++i) { |
| + int pos = postings.nextPosition(); |
| + assertEquals("position is wrong", pos, impacts.nextPosition()); |
| + if (doCheckOffsets) { |
| + assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset()); |
| + assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset()); |
| + } |
| + if (doCheckPayloads) { |
| + assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload()); |
| + } |
| + } |
| + |
| + if (doc > max) { |
| + int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc); |
| + max = doc + delta; |
| + int m = impacts.advanceShallow(doc); |
| + assertTrue(m >= doc); |
| + maxScore = impacts.getMaxScore(max); |
| + } |
| + |
| + float score = scorer.score(impacts.freq(), docToNorm.applyAsLong(doc)); |
| + assertTrue(score <= maxScore); |
| + } |
| + } |
| + } |
| } |
| |
| private static class TestThread extends Thread { |
| diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java |
| index 7e54f8d..1b81c7f 100644 |
| --- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java |
| +++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java |
| @@ -20,6 +20,7 @@ import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.PostingsEnum; |
| @@ -42,6 +43,7 @@ import org.apache.lucene.search.ScoreMode; |
| import org.apache.lucene.search.Scorer; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.Weight; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.DocIdSetBuilder; |
| @@ -239,6 +241,11 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return te.impacts(scorer, flags); |
| + } |
| + |
| + @Override |
| public BytesRef next() throws IOException { |
| if (positioned) { |
| positioned = false; |
| diff --git a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java |
| index b56d462..daaf00d 100644 |
| --- a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java |
| +++ b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java |
| @@ -26,6 +26,7 @@ import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.FieldInfo; |
| +import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.LegacySortedSetDocValues; |
| import org.apache.lucene.index.LegacySortedSetDocValuesWrapper; |
| @@ -34,6 +35,7 @@ import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.search.DocIdSetIterator; |
| +import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.util.Accountable; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| @@ -607,6 +609,11 @@ public class DocTermOrds implements Accountable { |
| } |
| |
| @Override |
| + public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException { |
| + return termsEnum.impacts(scorer, flags); |
| + } |
| + |
| + @Override |
| public BytesRef term() { |
| return term; |
| } |