blob: 28aea242c59ce22a78e13791db2263f2ac41f75e [file] [log] [blame]
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
index 17024d8..a405ccb 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
@@ -32,12 +32,14 @@ import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@@ -659,6 +661,12 @@ public class BlockTermsReader extends FieldsProducer {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ decodeMetaData();
+ return postingsReader.impacts(fieldInfo, state, scorer, flags);
+ }
+
+ @Override
public void seekExact(BytesRef target, TermState otherState) {
//System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this);
assert otherState != null && otherState instanceof BlockTermState;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
index f19cd2c..9ed87b5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
@@ -26,6 +26,7 @@ import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.IndexOptions;
@@ -127,7 +128,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
@@ -146,7 +147,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
break;
}
- termsWriter.write(term, termsEnum);
+ termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
@@ -232,9 +233,9 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder();
- void write(BytesRef text, TermsEnum termsEnum) throws IOException {
+ void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {
- BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
+ BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms);
if (state == null) {
// No docs for this term:
return;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
index b16bb15..bdc0738 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
@@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; // javadocs
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
@@ -213,7 +214,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
String lastField = null;
for(String field : fields) {
@@ -233,7 +234,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
if (term == null) {
break;
}
- termsWriter.write(term, termsEnum);
+ termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
@@ -771,7 +772,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
}
/** Writes one term's worth of postings. */
- public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
+ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {
/*
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
@@ -780,7 +781,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
}
*/
- BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
+ BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms);
if (state != null) {
assert state.docFreq != 0;
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
index 6c2d2bc..fdb54df 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java
@@ -20,9 +20,11 @@ package org.apache.lucene.codecs.blocktreeords;
import java.io.IOException;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -205,6 +207,12 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, reuse, flags);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ currentFrame.decodeMetaData();
+ return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
+ }
+
private int getState() {
int state = currentFrame.state;
for(int idx=0;idx<currentFrame.suffix;idx++) {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
index 9a9d871..8d55a1d 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java
@@ -25,9 +25,11 @@ import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -934,6 +936,19 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ assert !eof;
+ //if (DEBUG) {
+ //System.out.println("BTTR.docs seg=" + segment);
+ //}
+ currentFrame.decodeMetaData();
+ //if (DEBUG) {
+ //System.out.println(" state=" + currentFrame.state);
+ //}
+ return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
+ }
+
+ @Override
public void seekExact(BytesRef target, TermState otherState) {
// if (DEBUG) {
// System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
index b864bf0..b826102 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
@@ -29,16 +29,19 @@ import java.util.Map.Entry;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@@ -371,6 +374,10 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
return delegate().postings(reuse, flags);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return delegate().impacts(scorer, flags);
+ }
}
@Override
@@ -416,7 +423,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
// Delegate must write first: it may have opened files
// on creating the class
@@ -424,7 +431,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
// close them; alternatively, if we delayed pulling
// the fields consumer until here, we could do it
// afterwards:
- delegateFieldsConsumer.write(fields);
+ delegateFieldsConsumer.write(fields, norms);
for(String field : fields) {
Terms terms = fields.terms(field);
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index 00f25cf..901e1ae 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -29,14 +29,17 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.Accountable;
@@ -944,6 +947,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
+ }
}
private final class DirectIntersectTermsEnum extends TermsEnum {
@@ -1496,6 +1503,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
+ }
+
+ @Override
public SeekStatus seekCeil(BytesRef term) {
throw new UnsupportedOperationException();
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
index 5ba4c5f..4ecf4d6 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
@@ -34,6 +34,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
@@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
@@ -432,6 +434,12 @@ public class FSTOrdTermsReader extends FieldsProducer {
return postingsReader.postings(fieldInfo, state, reuse, flags);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ decodeMetaData();
+ return postingsReader.impacts(fieldInfo, state, scorer, flags);
+ }
+
// TODO: this can be achieved by making use of Util.getByOutput()
// and should have related tests
@Override
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
index cbe6583..b59d41d 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
@@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -186,7 +187,7 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
@@ -205,7 +206,7 @@ public class FSTOrdTermsWriter extends FieldsConsumer {
if (term == null) {
break;
}
- BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen);
+ BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
if (termState != null) {
termsWriter.finishTerm(term, termState);
sumTotalTermFreq += termState.totalTermFreq;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
index 8dda05c..b1b61e1 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@@ -34,6 +34,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
@@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@@ -299,6 +301,12 @@ public class FSTTermsReader extends FieldsProducer {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ decodeMetaData();
+ return postingsReader.impacts(fieldInfo, state, scorer, flags);
+ }
+
+ @Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
index 8284d74..8e55d41 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
@@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.FieldInfo;
@@ -158,7 +159,7 @@ public class FSTTermsWriter extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
@@ -179,7 +180,7 @@ public class FSTTermsWriter extends FieldsConsumer {
break;
}
- BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen);
+ BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
if (termState != null) {
termsWriter.finishTerm(term, termState);
sumTotalTermFreq += termState.totalTermFreq;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
index b81e56e..855002c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
@@ -29,7 +29,7 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.*;
-import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
@@ -870,5 +870,9 @@ class MemoryDocValuesProducer extends DocValuesProducer {
throw new UnsupportedOperationException();
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
}
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
index 21983c6..0df7d92 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@@ -27,6 +27,7 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
@@ -34,12 +35,15 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
@@ -287,7 +291,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
@@ -815,6 +819,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
+ }
+
+ @Override
public BytesRef term() {
return current.input;
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
index f5504b3..22aeb5c 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
@@ -29,11 +29,14 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
@@ -230,6 +233,10 @@ class SimpleTextFieldsReader extends FieldsProducer {
return docsEnum.reset(docsStart, indexOptions == IndexOptions.DOCS, docFreq);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
+ }
}
private class SimpleTextDocsEnum extends PostingsEnum {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
index 2023552..d8299b5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs.simpletext;
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -56,7 +57,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
write(writeState.fieldInfos, fields);
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
index 25f2a4d..ee0757d 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
@@ -27,10 +27,13 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
@@ -410,6 +413,10 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
return e;
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
+ }
}
// note: these two enum classes are exactly like the Default impl...
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveFreqNormAccumulator.java b/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveFreqNormAccumulator.java
new file mode 100644
index 0000000..3dd9d35
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompetitiveFreqNormAccumulator.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * This class accumulates the (freq, norm) pairs that may produce competitive scores.
+ */
+public final class CompetitiveFreqNormAccumulator {
+
+ // We speed up accumulation for common norm values by first computing
+ // the max freq for all norms in -128..127
+ private final int[] maxFreqs;
+ private boolean dirty;
+ private final TreeSet<FreqAndNorm> freqNormPairs;
+
+ /** Sole constructor. */
+ public CompetitiveFreqNormAccumulator() {
+ maxFreqs = new int[256];
+ Comparator<FreqAndNorm> comparator = new Comparator<CompetitiveFreqNormAccumulator.FreqAndNorm>() {
+ @Override
+ public int compare(FreqAndNorm o1, FreqAndNorm o2) {
+ // greater freqs compare greater
+ int cmp = Integer.compare(o1.freq, o2.freq);
+ if (cmp == 0) {
+ // greater norms compare lower
+ cmp = Long.compareUnsigned(o2.norm, o1.norm);
+ }
+ return cmp;
+ }
+ };
+ freqNormPairs = new TreeSet<>(comparator);
+ }
+
+ /** Reset to the same state it was in after creation. */
+ public void clear() {
+ Arrays.fill(maxFreqs, 0);
+ dirty = false;
+ freqNormPairs.clear();
+ }
+
+ /**
+ * A (freq, norm) pair.
+ */
+ public static class FreqAndNorm {
+ /** Doc-term frequency. */
+ public final int freq;
+ /** Normalization factor. */
+ public final long norm;
+
+ /** Sole constructor. */
+ public FreqAndNorm(int freq, long norm) {
+ this.freq = freq;
+ this.norm = norm;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null || obj instanceof FreqAndNorm == false) {
+ return false;
+ }
+ FreqAndNorm that = (FreqAndNorm) obj;
+ return freq == that.freq && norm == that.norm;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = getClass().hashCode();
+ h = 31 * h + freq;
+ h = 31 * h + Long.hashCode(norm);
+ return h;
+ }
+
+ @Override
+ public String toString() {
+ return "{" + freq + "," + norm + "}";
+ }
+ }
+
+ /** Accumulate a (freq,norm) pair, updating this structure if there is no
+ * equivalent or more competitive entry already. */
+ public void add(int freq, long norm) {
+ if (norm >= Byte.MIN_VALUE && norm <= Byte.MAX_VALUE) {
+ int index = Byte.toUnsignedInt((byte) norm);
+ maxFreqs[index] = Math.max(maxFreqs[index], freq);
+ dirty = true;
+ } else {
+ add(new FreqAndNorm(freq, norm));
+ }
+ }
+
+ /** Merge {@code acc} into this. */
+ public void addAll(CompetitiveFreqNormAccumulator acc) {
+ for (FreqAndNorm entry : acc.getCompetitiveFreqNormPairs()) {
+ add(entry);
+ }
+ }
+
+ /** Get the set of competitive freq and norm pairs, orderer by increasing freq and norm. */
+ public SortedSet<FreqAndNorm> getCompetitiveFreqNormPairs() {
+ if (dirty) {
+ for (int i = 0; i < maxFreqs.length; ++i) {
+ if (maxFreqs[i] > 0) {
+ add(new FreqAndNorm(maxFreqs[i], (byte) i));
+ maxFreqs[i] = 0;
+ }
+ }
+ dirty = false;
+ }
+ return Collections.unmodifiableSortedSet(freqNormPairs);
+ }
+
+ private void add(FreqAndNorm newEntry) {
+ FreqAndNorm next = freqNormPairs.ceiling(newEntry);
+ if (next == null) {
+ // nothing is more competitive
+ freqNormPairs.add(newEntry);
+ } else if (Long.compareUnsigned(next.norm, newEntry.norm) <= 0) {
+ // we already have this entry or more competitive entries in the tree
+ return;
+ } else {
+ // some entries have a greater freq but a less competitive norm, so we
+ // don't know which one will trigger greater scores, still add to the tree
+ freqNormPairs.add(newEntry);
+ }
+
+ for (Iterator<FreqAndNorm> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
+ FreqAndNorm entry = it.next();
+ if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
+ // less competitive
+ it.remove();
+ } else {
+ // lesser freq but better norm, further entries are not comparable
+ break;
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getCompetitiveFreqNormPairs().toString();
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
index 28bae1d..f4fc9ac 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
@@ -76,14 +76,14 @@ public abstract class FieldsConsumer implements Closeable {
* live docs when pulling docs/positions enums.
* </ul>
*/
- public abstract void write(Fields fields) throws IOException;
+ public abstract void write(Fields fields, NormsProducer norms) throws IOException;
/** Merges in the fields from the readers in
* <code>mergeState</code>. The default implementation skips
- * and maps around deleted documents, and calls {@link #write(Fields)}.
+ * and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
- public void merge(MergeState mergeState) throws IOException {
+ public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
final List<Fields> fields = new ArrayList<>();
final List<ReaderSlice> slices = new ArrayList<>();
@@ -102,7 +102,7 @@ public abstract class FieldsConsumer implements Closeable {
Fields mergedFields = new MappedMultiFields(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderSlice.EMPTY_ARRAY)));
- write(mergedFields);
+ write(mergedFields, norms);
}
// NOTE: strange but necessary so javadocs linting is happy:
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
index c937886..517c731 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
@@ -40,8 +40,8 @@ public abstract class MultiLevelSkipListReader implements Closeable {
/** the maximum number of skip levels possible for this index */
protected int maxNumberOfSkipLevels;
- // number of levels in this skip list
- private int numberOfSkipLevels;
+ /** number of levels in this skip list */
+ protected int numberOfSkipLevels;
// Expert: defines the number of top skip levels to buffer in memory.
// Reducing this number results in less memory usage, but possibly
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
index 207b324..8e090be 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
@@ -53,13 +53,13 @@ import org.apache.lucene.util.MathUtil;
public abstract class MultiLevelSkipListWriter {
/** number of levels in this skip list */
- protected int numberOfSkipLevels;
+ protected final int numberOfSkipLevels;
/** the skip interval in the list with level = 0 */
- private int skipInterval;
+ private final int skipInterval;
/** skipInterval used for level &gt; 0 */
- private int skipMultiplier;
+ private final int skipMultiplier;
/** for every skip level a different buffer is used */
private RAMOutputStream[] skipBuffer;
@@ -69,6 +69,7 @@ public abstract class MultiLevelSkipListWriter {
this.skipInterval = skipInterval;
this.skipMultiplier = skipMultiplier;
+ int numberOfSkipLevels;
// calculate the maximum number of skip levels for this document frequency
if (df <= skipInterval) {
numberOfSkipLevels = 1;
@@ -80,6 +81,7 @@ public abstract class MultiLevelSkipListWriter {
if (numberOfSkipLevels > maxSkipLevels) {
numberOfSkipLevels = maxSkipLevels;
}
+ this.numberOfSkipLevels = numberOfSkipLevels;
}
/** Creates a {@code MultiLevelSkipListWriter}, where
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
index 56cbab5..ca403fa 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
@@ -22,7 +22,9 @@ import java.io.IOException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@@ -65,7 +67,13 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
/** Must fully consume state, since after this call that
* TermState may be reused. */
public abstract PostingsEnum postings(FieldInfo fieldInfo, BlockTermState state, PostingsEnum reuse, int flags) throws IOException;
-
+
+ /**
+ * Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
+ * @see #postings(FieldInfo, BlockTermState, PostingsEnum, int)
+ */
+ public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, Similarity.SimScorer scorer, int flags) throws IOException;
+
/**
* Checks consistency of this reader.
* <p>
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
index b4f2d4e..48c6027 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
@@ -60,7 +60,7 @@ public abstract class PostingsWriterBase implements Closeable {
* FixedBitSet} for every docID written. If no docs
* were written, this method should return null, and the
* terms dict will skip the term. */
- public abstract BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen) throws IOException;
+ public abstract BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException;
/**
* Encode metadata as long[] and byte[]. {@code absolute} controls whether
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
index 1fb83b9..98bdd91 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@@ -74,7 +75,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
/** Start a new term. Note that a matching call to {@link
* #finishTerm(BlockTermState)} is done, only if the term has at least one
* document. */
- public abstract void startTerm() throws IOException;
+ public abstract void startTerm(NumericDocValues norms) throws IOException;
/** Finishes the current term. The provided {@link
* BlockTermState} contains the term's summary statistics,
@@ -117,8 +118,14 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
}
@Override
- public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen) throws IOException {
- startTerm();
+ public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms) throws IOException {
+ NumericDocValues normValues;
+ if (fieldInfo.hasNorms() == false) {
+ normValues = null;
+ } else {
+ normValues = norms.getNorms(fieldInfo);
+ }
+ startTerm(normValues);
postingsEnum = termsEnum.postings(postingsEnum, enumFlags);
assert postingsEnum != null;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
index ffc182f..4305c46 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
@@ -17,9 +17,13 @@
package org.apache.lucene.codecs.blocktree;
+import java.io.IOException;
+
import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
@@ -80,4 +84,9 @@ class BitSetTermsEnum extends TermsEnum {
postingsEnum.reset();
return postingsEnum;
}
+
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
index bdacc22..ec3f6e6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -315,7 +316,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
//if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
String lastField = null;
@@ -340,7 +341,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
//if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
- termsWriter.write(term, termsEnum);
+ termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
@@ -852,7 +853,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
/** Writes one term's worth of postings. */
- public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
+ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {
/*
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
@@ -861,7 +862,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
*/
- BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
+ BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms);
if (state != null) {
assert state.docFreq != 0;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
index 7521763..6bccddc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
@@ -19,10 +19,12 @@ package org.apache.lucene.codecs.blocktree;
import java.io.IOException;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -232,6 +234,12 @@ final class IntersectTermsEnum extends TermsEnum {
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, reuse, flags);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ currentFrame.decodeMetaData();
+ return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
+ }
+
private int getState() {
int state = currentFrame.state;
for(int idx=0;idx<currentFrame.suffix;idx++) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
index 73c32bb..ef83f49 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
@@ -21,9 +21,11 @@ import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -1003,6 +1005,19 @@ final class SegmentTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ assert !eof;
+ //if (DEBUG) {
+ //System.out.println("BTTR.docs seg=" + segment);
+ //}
+ currentFrame.decodeMetaData();
+ //if (DEBUG) {
+ //System.out.println(" state=" + currentFrame.state);
+ //}
+ return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
+ }
+
+ @Override
public void seekExact(BytesRef target, TermState otherState) {
// if (DEBUG) {
// System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
index f5318ba..a0f5292 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
@@ -27,14 +27,17 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
@@ -942,6 +945,13 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
return docsEnum;
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ final PostingsEnum delegate = postings(null, PostingsEnum.FREQS);
+ final float maxScore = scorer.score(Float.MAX_VALUE, 1);
+ return new SlowImpactsEnum(delegate, maxScore);
+ }
+
}
private static class TVPostingsEnum extends PostingsEnum {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
index 3b0d5c2..a99894a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
@@ -384,7 +384,8 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
// Increment version to change it
final static int VERSION_START = 0;
- final static int VERSION_CURRENT = VERSION_START;
+ final static int VERSION_IMPACT_SKIP_DATA = 1;
+ final static int VERSION_CURRENT = VERSION_IMPACT_SKIP_DATA;
private final int minTermBlockSize;
private final int maxTermBlockSize;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java
index 0dde774..fea0e208 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java
@@ -19,16 +19,20 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Objects;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SlowImpactsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -234,6 +238,16 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
}
}
+ @Override
+ public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
+ Objects.requireNonNull(scorer);
+ if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+ // no skip data
+ return new SlowImpactsEnum(postings(fieldInfo, state, null, flags), scorer.score(Float.MAX_VALUE, 1));
+ }
+ return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, scorer, flags);
+ }
+
final class BlockDocsEnum extends PostingsEnum {
private final byte[] encoded;
@@ -401,7 +415,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
- skipper = new Lucene50SkipReader(docIn.clone(),
+ skipper = new Lucene50SkipReader(version,
+ docIn.clone(),
MAX_SKIP_LEVELS,
indexHasPos,
indexHasOffsets,
@@ -666,7 +681,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
if (target > nextSkipDoc) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
- skipper = new Lucene50SkipReader(docIn.clone(),
+ skipper = new Lucene50SkipReader(version,
+ docIn.clone(),
MAX_SKIP_LEVELS,
true,
indexHasOffsets,
@@ -1082,7 +1098,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
if (target > nextSkipDoc) {
if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping
- skipper = new Lucene50SkipReader(docIn.clone(),
+ skipper = new Lucene50SkipReader(version,
+ docIn.clone(),
MAX_SKIP_LEVELS,
true,
indexHasOffsets,
@@ -1272,6 +1289,469 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
}
}
+ final class BlockImpactsEverythingEnum extends ImpactsEnum {
+
+ private final byte[] encoded;
+
+ private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
+ private final int[] freqBuffer = new int[MAX_DATA_SIZE];
+ private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
+
+ private final int[] payloadLengthBuffer;
+ private final int[] offsetStartDeltaBuffer;
+ private final int[] offsetLengthBuffer;
+
+ private byte[] payloadBytes;
+ private int payloadByteUpto;
+ private int payloadLength;
+
+ private int lastStartOffset;
+ private int startOffset = -1;
+ private int endOffset = -1;
+
+ private int docBufferUpto;
+ private int posBufferUpto;
+
+ private final Lucene50ScoreSkipReader skipper;
+
+ IndexInput docIn;
+ final IndexInput posIn;
+ final IndexInput payIn;
+ final BytesRef payload;
+
+ final boolean indexHasFreq;
+ final boolean indexHasPos;
+ final boolean indexHasOffsets;
+ final boolean indexHasPayloads;
+
+ private int docFreq; // number of docs in this posting list
+ private long totalTermFreq; // number of positions in this posting list
+ private int docUpto; // how many docs we've read
+ private int doc; // doc we last read
+ private int accum; // accumulator for doc deltas
+ private int freq; // freq we last read
+ private int position; // current position
+
+ // how many positions "behind" we are; nextPosition must
+ // skip these to "catch up":
+ private int posPendingCount;
+
+ // Lazy pos seek: if != -1 then we must seek to this FP
+ // before reading positions:
+ private long posPendingFP;
+
+ // Lazy pay seek: if != -1 then we must seek to this FP
+ // before reading payloads/offsets:
+ private long payPendingFP;
+
+ // Where this term's postings start in the .doc file:
+ private long docTermStartFP;
+
+ // Where this term's postings start in the .pos file:
+ private long posTermStartFP;
+
+ // Where this term's payloads/offsets start in the .pay
+ // file:
+ private long payTermStartFP;
+
+ // File pointer where the last (vInt encoded) pos delta
+ // block is. We need this to know whether to bulk
+ // decode vs vInt decode the block:
+ private long lastPosBlockFP;
+
+ private int nextSkipDoc = -1;
+
+ private final boolean needsPositions;
+ private final boolean needsOffsets; // true if we actually need offsets
+ private final boolean needsPayloads; // true if we actually need payloads
+
+ private long seekTo = -1;
+
+ public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, SimScorer scorer, int flags) throws IOException {
+ indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ indexHasPayloads = fieldInfo.hasPayloads();
+
+ needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS);
+ needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
+ needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+
+ this.docIn = Lucene50PostingsReader.this.docIn.clone();
+
+ encoded = new byte[MAX_ENCODED_SIZE];
+
+ if (indexHasPos && needsPositions) {
+ this.posIn = Lucene50PostingsReader.this.posIn.clone();
+ } else {
+ this.posIn = null;
+ }
+
+ if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) {
+ this.payIn = Lucene50PostingsReader.this.payIn.clone();
+ } else {
+ this.payIn = null;
+ }
+
+ if (indexHasOffsets) {
+ offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
+ offsetLengthBuffer = new int[MAX_DATA_SIZE];
+ } else {
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ startOffset = -1;
+ endOffset = -1;
+ }
+
+ if (indexHasPayloads) {
+ payloadLengthBuffer = new int[MAX_DATA_SIZE];
+ payloadBytes = new byte[128];
+ payload = new BytesRef();
+ } else {
+ payloadLengthBuffer = null;
+ payloadBytes = null;
+ payload = null;
+ }
+
+ docFreq = termState.docFreq;
+ docTermStartFP = termState.docStartFP;
+ posTermStartFP = termState.posStartFP;
+ payTermStartFP = termState.payStartFP;
+ totalTermFreq = termState.totalTermFreq;
+ docIn.seek(docTermStartFP);
+ posPendingFP = posTermStartFP;
+ payPendingFP = payTermStartFP;
+ posPendingCount = 0;
+ if (termState.totalTermFreq < BLOCK_SIZE) {
+ lastPosBlockFP = posTermStartFP;
+ } else if (termState.totalTermFreq == BLOCK_SIZE) {
+ lastPosBlockFP = -1;
+ } else {
+ lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+ }
+
+ doc = -1;
+ accum = 0;
+ docUpto = 0;
+ docBufferUpto = BLOCK_SIZE;
+
+ skipper = new Lucene50ScoreSkipReader(version,
+ docIn.clone(),
+ MAX_SKIP_LEVELS,
+ indexHasPos,
+ indexHasOffsets,
+ indexHasPayloads,
+ scorer);
+ skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
+
+ if (indexHasFreq == false) {
+ Arrays.fill(freqBuffer, 1);
+ }
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return freq;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ private void refillDocs() throws IOException {
+ final int left = docFreq - docUpto;
+ assert left > 0;
+
+ if (left >= BLOCK_SIZE) {
+ forUtil.readBlock(docIn, encoded, docDeltaBuffer);
+ if (indexHasFreq) {
+ forUtil.readBlock(docIn, encoded, freqBuffer);
+ }
+ } else {
+ readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq);
+ }
+ docBufferUpto = 0;
+ }
+
+ private void refillPositions() throws IOException {
+ if (posIn.getFilePointer() == lastPosBlockFP) {
+ final int count = (int) (totalTermFreq % BLOCK_SIZE);
+ int payloadLength = 0;
+ int offsetLength = 0;
+ payloadByteUpto = 0;
+ for(int i=0;i<count;i++) {
+ int code = posIn.readVInt();
+ if (indexHasPayloads) {
+ if ((code & 1) != 0) {
+ payloadLength = posIn.readVInt();
+ }
+ payloadLengthBuffer[i] = payloadLength;
+ posDeltaBuffer[i] = code >>> 1;
+ if (payloadLength != 0) {
+ if (payloadByteUpto + payloadLength > payloadBytes.length) {
+ payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
+ }
+ posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
+ payloadByteUpto += payloadLength;
+ }
+ } else {
+ posDeltaBuffer[i] = code;
+ }
+
+ if (indexHasOffsets) {
+ int deltaCode = posIn.readVInt();
+ if ((deltaCode & 1) != 0) {
+ offsetLength = posIn.readVInt();
+ }
+ offsetStartDeltaBuffer[i] = deltaCode >>> 1;
+ offsetLengthBuffer[i] = offsetLength;
+ }
+ }
+ payloadByteUpto = 0;
+ } else {
+ forUtil.readBlock(posIn, encoded, posDeltaBuffer);
+
+ if (indexHasPayloads && payIn != null) {
+ if (needsPayloads) {
+ forUtil.readBlock(payIn, encoded, payloadLengthBuffer);
+ int numBytes = payIn.readVInt();
+
+ if (numBytes > payloadBytes.length) {
+ payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
+ }
+ payIn.readBytes(payloadBytes, 0, numBytes);
+ } else {
+ // this works, because when writing a vint block we always force the first length to be written
+ forUtil.skipBlock(payIn); // skip over lengths
+ int numBytes = payIn.readVInt(); // read length of payloadBytes
+ payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
+ }
+ payloadByteUpto = 0;
+ }
+
+ if (indexHasOffsets && payIn != null) {
+ if (needsOffsets) {
+ forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer);
+ forUtil.readBlock(payIn, encoded, offsetLengthBuffer);
+ } else {
+ // this works, because when writing a vint block we always force the first length to be written
+ forUtil.skipBlock(payIn); // skip over starts
+ forUtil.skipBlock(payIn); // skip over lengths
+ }
+ }
+ }
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return skipper.getMaxScore(upTo);
+ }
+
+ @Override
+ public int advanceShallow(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ // always plus one to fix the result, since skip position in Lucene50SkipReader
+ // is a little different from MultiLevelSkipListReader
+ final int newDocUpto = skipper.skipTo(target) + 1;
+
+ if (newDocUpto > docUpto) {
+ // Skipper moved
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
+
+ // Force to read next block
+ docBufferUpto = BLOCK_SIZE;
+ accum = skipper.getDoc();
+ posPendingFP = skipper.getPosPointer();
+ payPendingFP = skipper.getPayPointer();
+ posPendingCount = skipper.getPosBufferUpto();
+ lastStartOffset = 0; // new document
+ payloadByteUpto = skipper.getPayloadByteUpto(); // actually, this is just lastSkipEntry
+ seekTo = skipper.getDocPointer(); // delay the seek
+ }
+ // next time we call advance, this is used to
+ // foresee whether skipper is necessary.
+ nextSkipDoc = skipper.getNextSkipDoc();
+ }
+ assert nextSkipDoc >= target;
+ return nextSkipDoc;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ advanceShallow(target);
+ }
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ if (seekTo >= 0) {
+ docIn.seek(seekTo);
+ seekTo = -1;
+ }
+ refillDocs();
+ }
+
+ // Now scan:
+ while (true) {
+ accum += docDeltaBuffer[docBufferUpto];
+ freq = freqBuffer[docBufferUpto];
+ posPendingCount += freq;
+ docBufferUpto++;
+ docUpto++;
+
+ if (accum >= target) {
+ break;
+ }
+ if (docUpto == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ position = 0;
+ lastStartOffset = 0;
+
+ return doc = accum;
+ }
+
+ // TODO: in theory we could avoid loading frq block
+ // when not needed, ie, use skip data to load how far to
+ // seek the pos pointer ... instead of having to load frq
+ // blocks only to sum up how many positions to skip
+ private void skipPositions() throws IOException {
+ // Skip positions now:
+ int toSkip = posPendingCount - freq;
+ // if (DEBUG) {
+ // System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
+ // }
+
+ final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+ if (toSkip < leftInBlock) {
+ int end = posBufferUpto + toSkip;
+ while(posBufferUpto < end) {
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+ }
+ posBufferUpto++;
+ }
+ } else {
+ toSkip -= leftInBlock;
+ while(toSkip >= BLOCK_SIZE) {
+ assert posIn.getFilePointer() != lastPosBlockFP;
+ forUtil.skipBlock(posIn);
+
+ if (indexHasPayloads && payIn != null) {
+ // Skip payloadLength block:
+ forUtil.skipBlock(payIn);
+
+ // Skip payloadBytes block:
+ int numBytes = payIn.readVInt();
+ payIn.seek(payIn.getFilePointer() + numBytes);
+ }
+
+ if (indexHasOffsets && payIn != null) {
+ forUtil.skipBlock(payIn);
+ forUtil.skipBlock(payIn);
+ }
+ toSkip -= BLOCK_SIZE;
+ }
+ refillPositions();
+ payloadByteUpto = 0;
+ posBufferUpto = 0;
+ while(posBufferUpto < toSkip) {
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+ }
+ posBufferUpto++;
+ }
+ }
+
+ position = 0;
+ lastStartOffset = 0;
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ if (indexHasPos == false || needsPositions == false) {
+ return -1;
+ }
+ assert posPendingCount > 0;
+
+ if (posPendingFP != -1) {
+ posIn.seek(posPendingFP);
+ posPendingFP = -1;
+
+ if (payPendingFP != -1 && payIn != null) {
+ payIn.seek(payPendingFP);
+ payPendingFP = -1;
+ }
+
+ // Force buffer refill:
+ posBufferUpto = BLOCK_SIZE;
+ }
+
+ if (posPendingCount > freq) {
+ skipPositions();
+ posPendingCount = freq;
+ }
+
+ if (posBufferUpto == BLOCK_SIZE) {
+ refillPositions();
+ posBufferUpto = 0;
+ }
+ position += posDeltaBuffer[posBufferUpto];
+
+ if (indexHasPayloads) {
+ payloadLength = payloadLengthBuffer[posBufferUpto];
+ payload.bytes = payloadBytes;
+ payload.offset = payloadByteUpto;
+ payload.length = payloadLength;
+ payloadByteUpto += payloadLength;
+ }
+
+ if (indexHasOffsets && needsOffsets) {
+ startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
+ endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
+ lastStartOffset = startOffset;
+ }
+
+ posBufferUpto++;
+ posPendingCount--;
+ return position;
+ }
+
+ @Override
+ public int startOffset() {
+ return startOffset;
+ }
+
+ @Override
+ public int endOffset() {
+ return endOffset;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ if (payloadLength == 0) {
+ return null;
+ } else {
+ return payload;
+ }
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+
+ }
+
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
index 6d24a4c..06b9a0c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
@@ -31,12 +31,14 @@ import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@@ -96,7 +98,11 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
private final ForUtil forUtil;
private final Lucene50SkipWriter skipWriter;
-
+
+ private boolean fieldHasNorms;
+ private NumericDocValues norms;
+ private final CompetitiveFreqNormAccumulator competitiveFreqNormAccumulator = new CompetitiveFreqNormAccumulator();
+
/** Creates a postings writer */
public Lucene50PostingsWriter(SegmentWriteState state) throws IOException {
final float acceptableOverheadRatio = PackedInts.COMPACT;
@@ -185,6 +191,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
super.setField(fieldInfo);
skipWriter.setField(writePositions, writeOffsets, writePayloads);
lastState = emptyState;
+ fieldHasNorms = fieldInfo.hasNorms();
if (writePositions) {
if (writePayloads || writeOffsets) {
return 3; // doc + pos + pay FP
@@ -197,7 +204,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
}
@Override
- public void startTerm() {
+ public void startTerm(NumericDocValues norms) {
docStartFP = docOut.getFilePointer();
if (writePositions) {
posStartFP = posOut.getFilePointer();
@@ -208,6 +215,8 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
lastDocID = 0;
lastBlockDocID = -1;
skipWriter.resetSkip();
+ this.norms = norms;
+ competitiveFreqNormAccumulator.clear();
}
@Override
@@ -216,7 +225,9 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
// Should write skip data as well as postings list for
// current block.
if (lastBlockDocID != -1 && docBufferUpto == 0) {
- skipWriter.bufferSkip(lastBlockDocID, docCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
+ skipWriter.bufferSkip(lastBlockDocID, competitiveFreqNormAccumulator, docCount,
+ lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockPayloadByteUpto);
+ competitiveFreqNormAccumulator.clear();
}
final int docDelta = docID - lastDocID;
@@ -247,6 +258,24 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
lastDocID = docID;
lastPosition = 0;
lastStartOffset = 0;
+
+ long norm;
+ if (fieldHasNorms) {
+ boolean found = norms.advanceExact(docID);
+ if (found == false) {
+ // This can happen if indexing hits a problem after adding a doc to the
+ // postings but before buffering the norm. Such documents are written
+ // deleted and will go away on the first merge.
+ norm = 1L;
+ } else {
+ norm = norms.longValue();
+ assert norm != 0 : docID;
+ }
+ } else {
+ norm = 1L;
+ }
+
+ competitiveFreqNormAccumulator.add(writeFreqs ? termDocFreq : 1, norm);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java
new file mode 100644
index 0000000..cb1f54a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50ScoreSkipReader.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene50;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+
+final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
+
+ private final SimScorer scorer;
+ private final float[] maxScore;
+ private final byte[][] impacts;
+ private final int[] impactsLength;
+ private final float globalMaxScore;
+ private final ByteArrayDataInput badi = new ByteArrayDataInput();
+
+ public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels,
+ boolean hasPos, boolean hasOffsets, boolean hasPayloads, SimScorer scorer) {
+ super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
+ if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+ throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
+ }
+ this.scorer = Objects.requireNonNull(scorer);
+ this.maxScore = new float[maxSkipLevels];
+ this.impacts = new byte[maxSkipLevels][];
+ Arrays.fill(impacts, new byte[0]);
+ this.impactsLength = new int[maxSkipLevels];
+ this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1);
+ }
+
+ @Override
+ public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException {
+ super.init(skipPointer, docBasePointer, posBasePointer, payBasePointer, df);
+ Arrays.fill(impactsLength, 0);
+ Arrays.fill(maxScore, globalMaxScore);
+ }
+
+ /** Upper bound of scores up to {@code upTo} included. */
+ public float getMaxScore(int upTo) throws IOException {
+ for (int level = 0; level < numberOfSkipLevels; ++level) {
+ if (upTo <= skipDoc[level]) {
+ return maxScore(level);
+ }
+ }
+ return globalMaxScore;
+ }
+
+ private float maxScore(int level) throws IOException {
+ assert level < numberOfSkipLevels;
+ if (impactsLength[level] > 0) {
+ badi.reset(impacts[level], 0, impactsLength[level]);
+ maxScore[level] = readImpacts(badi, scorer);
+ impactsLength[level] = 0;
+ }
+ return maxScore[level];
+ }
+
+ @Override
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ int length = skipStream.readVInt();
+ if (impacts[level].length < length) {
+ impacts[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
+ }
+ skipStream.readBytes(impacts[level], 0, length);
+ impactsLength[level] = length;
+ }
+
+ static float readImpacts(ByteArrayDataInput in, SimScorer scorer) throws IOException {
+ int freq = 0;
+ long norm = 0;
+ float maxScore = 0;
+ while (in.getPosition() < in.length()) {
+ int freqDelta = in.readVInt();
+ if ((freqDelta & 0x01) != 0) {
+ freq += 1 + (freqDelta >>> 1);
+ norm += 1 + in.readZLong();
+ } else {
+ freq += 1 + (freqDelta >>> 1);
+ norm++;
+ }
+ maxScore = Math.max(maxScore, scorer.score(freq, norm));
+ }
+ return maxScore;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
index 8c037c5..b92cd42 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipReader.java
@@ -52,7 +52,8 @@ import static org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.BLOCK_SIZ
* Therefore, we'll trim df before passing it to the interface. see trim(int)
*
*/
-final class Lucene50SkipReader extends MultiLevelSkipListReader {
+class Lucene50SkipReader extends MultiLevelSkipListReader {
+ private final int version;
private long docPointer[];
private long posPointer[];
private long payPointer[];
@@ -65,8 +66,11 @@ final class Lucene50SkipReader extends MultiLevelSkipListReader {
private long lastDocPointer;
private int lastPosBufferUpto;
- public Lucene50SkipReader(IndexInput skipStream, int maxSkipLevels, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
+ public Lucene50SkipReader(int version,
+ IndexInput skipStream, int maxSkipLevels,
+ boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
super(skipStream, maxSkipLevels, BLOCK_SIZE, 8);
+ this.version = version;
docPointer = new long[maxSkipLevels];
if (hasPos) {
posPointer = new long[maxSkipLevels];
@@ -192,6 +196,17 @@ final class Lucene50SkipReader extends MultiLevelSkipListReader {
payPointer[level] += skipStream.readVLong();
}
}
+ readImpacts(level, skipStream);
return delta;
}
+
+ // The default impl skips impacts since they are only useful if we have a SimScorer
+ // to compute the scores that impacts map to.
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
+ // The base implementation skips impacts, they are not used
+ skipStream.skipBytes(skipStream.readVInt());
+ }
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
index a4556c6..cc94ed0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SkipWriter.java
@@ -19,9 +19,14 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Set;
+import java.util.SortedSet;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
/**
* Write skip lists with multiple levels, and support skip within block ints.
@@ -60,6 +65,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
private long curPayPointer;
private int curPosBufferUpto;
private int curPayloadByteUpto;
+ private CompetitiveFreqNormAccumulator[] curCompetitiveFreqNorms;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
@@ -79,6 +85,10 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
}
lastPayloadByteUpto = new int[maxSkipLevels];
}
+ curCompetitiveFreqNorms = new CompetitiveFreqNormAccumulator[maxSkipLevels];
+ for (int i = 0; i < maxSkipLevels; ++i) {
+ curCompetitiveFreqNorms[i] = new CompetitiveFreqNormAccumulator();
+ }
}
public void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
@@ -105,10 +115,15 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
lastPayFP = payOut.getFilePointer();
}
}
+ if (initialized) {
+ for (CompetitiveFreqNormAccumulator acc : curCompetitiveFreqNorms) {
+ acc.clear();
+ }
+ }
initialized = false;
}
- public void initSkip() {
+ private void initSkip() {
if (!initialized) {
super.resetSkip();
Arrays.fill(lastSkipDoc, 0);
@@ -122,6 +137,11 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
Arrays.fill(lastSkipPayPointer, lastPayFP);
}
}
+ // sets of competitive freq,norm pairs should be empty at this point
+ assert Arrays.stream(curCompetitiveFreqNorms)
+ .map(CompetitiveFreqNormAccumulator::getCompetitiveFreqNormPairs)
+ .mapToInt(Set::size)
+ .sum() == 0;
initialized = true;
}
}
@@ -129,7 +149,8 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
/**
* Sets the values for the current skip data.
*/
- public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
+ public void bufferSkip(int doc, CompetitiveFreqNormAccumulator competitiveFreqNorms,
+ int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
initSkip();
this.curDoc = doc;
this.curDocPointer = docOut.getFilePointer();
@@ -137,11 +158,15 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
this.curPayPointer = payFP;
this.curPosBufferUpto = posBufferUpto;
this.curPayloadByteUpto = payloadByteUpto;
+ this.curCompetitiveFreqNorms[0].addAll(competitiveFreqNorms);
bufferSkip(numDocs);
}
-
+
+ private final RAMOutputStream freqNormOut = new RAMOutputStream();
+
@Override
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
+
int delta = curDoc - lastSkipDoc[level];
skipBuffer.writeVInt(delta);
@@ -165,5 +190,35 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
lastSkipPayPointer[level] = curPayPointer;
}
}
+
+ CompetitiveFreqNormAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
+ assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
+ if (level + 1 < numberOfSkipLevels) {
+ curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
+ }
+ writeImpacts(competitiveFreqNorms, freqNormOut);
+ skipBuffer.writeVInt(Math.toIntExact(freqNormOut.getFilePointer()));
+ freqNormOut.writeTo(skipBuffer);
+ freqNormOut.reset();
+ competitiveFreqNorms.clear();
+ }
+
+ static void writeImpacts(CompetitiveFreqNormAccumulator acc, IndexOutput out) throws IOException {
+ SortedSet<FreqAndNorm> freqAndNorms = acc.getCompetitiveFreqNormPairs();
+ FreqAndNorm previous = new FreqAndNorm(0, 0);
+ for (FreqAndNorm freqAndNorm : freqAndNorms) {
+ assert freqAndNorm.freq > previous.freq;
+ assert Long.compareUnsigned(freqAndNorm.norm, previous.norm) > 0;
+ int freqDelta = freqAndNorm.freq - previous.freq - 1;
+ long normDelta = freqAndNorm.norm - previous.norm - 1;
+ if (normDelta == 0) {
+ // most of time, norm only increases by 1, so we can fold everything in a single byte
+ out.writeVInt(freqDelta << 1);
+ } else {
+ out.writeVInt((freqDelta << 1) | 1);
+ out.writeZLong(normDelta);
+ }
+ previous = freqAndNorm;
+ }
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
index 24eaf7a..6138896 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
@@ -100,7 +100,13 @@ final class IndexedDISI extends DocIdSetIterator {
private final long cost;
IndexedDISI(IndexInput in, long offset, long length, long cost) throws IOException {
- this.slice = in.slice("docs", offset, length);
+ this(in.slice("docs", offset, length), cost);
+ }
+
+ // This constructor allows to pass the slice directly in case it helps reuse
+ // see eg. Lucene70 norms producer's merge instance
+ IndexedDISI(IndexInput slice, long cost) throws IOException {
+ this.slice = slice;
this.cost = cost;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index 386655e..7bea274 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
@@ -37,6 +38,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
@@ -1158,6 +1160,11 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public int docFreq() throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
index eb7c41a..c7310e8 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
@@ -40,11 +40,14 @@ import org.apache.lucene.util.IOUtils;
/**
* Reader for {@link Lucene70NormsFormat}
*/
-final class Lucene70NormsProducer extends NormsProducer {
+final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
// metadata maps (just file pointers and minimal stuff)
private final Map<Integer,NormsEntry> norms = new HashMap<>();
- private final IndexInput data;
private final int maxDoc;
+ private IndexInput data;
+ private boolean merging;
+ private Map<Integer, IndexInput> disiInputs;
+ private Map<Integer, RandomAccessInput> dataInputs;
Lucene70NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.maxDoc();
@@ -87,6 +90,22 @@ final class Lucene70NormsProducer extends NormsProducer {
}
}
+ @Override
+ public NormsProducer getMergeInstance() throws IOException {
+ Lucene70NormsProducer clone;
+ try {
+ clone = (Lucene70NormsProducer) super.clone();
+ } catch (CloneNotSupportedException e) {
+ // cannot happen
+ throw new RuntimeException(e);
+ }
+ clone.data = data.clone();
+ clone.dataInputs = new HashMap<>();
+ clone.disiInputs = new HashMap<>();
+ clone.merging = true;
+ return clone;
+ }
+
static class NormsEntry {
byte bytesPerNorm;
long docsWithFieldOffset;
@@ -193,6 +212,34 @@ final class Lucene70NormsProducer extends NormsProducer {
}
}
+ private RandomAccessInput getDataInput(FieldInfo field, NormsEntry entry) throws IOException {
+ RandomAccessInput slice = null;
+ if (merging) {
+ slice = dataInputs.get(field.number);
+ }
+ if (slice == null) {
+ slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm);
+ if (merging) {
+ dataInputs.put(field.number, slice);
+ }
+ }
+ return slice;
+ }
+
+ private IndexInput getDisiInput(FieldInfo field, NormsEntry entry) throws IOException {
+ IndexInput slice = null;
+ if (merging) {
+ slice = disiInputs.get(field.number);
+ }
+ if (slice == null) {
+ slice = data.slice("docs", entry.docsWithFieldOffset, entry.docsWithFieldLength);
+ if (merging) {
+ disiInputs.put(field.number, slice);
+ }
+ }
+ return slice;
+ }
+
@Override
public NumericDocValues getNorms(FieldInfo field) throws IOException {
final NormsEntry entry = norms.get(field.number);
@@ -209,7 +256,7 @@ final class Lucene70NormsProducer extends NormsProducer {
}
};
}
- final RandomAccessInput slice = data.randomAccessSlice(entry.normsOffset, entry.numDocsWithField * (long) entry.bytesPerNorm);
+ final RandomAccessInput slice = getDataInput(field, entry);
switch (entry.bytesPerNorm) {
case 1:
return new DenseNormsIterator(maxDoc) {
@@ -245,7 +292,8 @@ final class Lucene70NormsProducer extends NormsProducer {
}
} else {
// sparse
- final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
+ final IndexInput disiInput = getDisiInput(field, entry);
+ final IndexedDISI disi = new IndexedDISI(disiInput, entry.numDocsWithField);
if (entry.bytesPerNorm == 0) {
return new SparseNormsIterator(disi) {
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
index 281b08f..36f0358 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@@ -34,6 +34,7 @@ import java.util.TreeSet;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
@@ -117,7 +118,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
// Write postings
@@ -137,7 +138,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
- consumer.write(maskedFields);
+ consumer.write(maskedFields, norms);
}
success = true;
} finally {
@@ -148,7 +149,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
}
@Override
- public void merge(MergeState mergeState) throws IOException {
+ public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(new MultiFields(mergeState.fieldsProducers, null));
// Merge postings
@@ -161,7 +162,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
- consumer.merge(pfMergeState.apply(group.fields));
+ consumer.merge(pfMergeState.apply(group.fields), norms);
}
success = true;
} finally {
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index c676568..7dd1aa9 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -48,6 +48,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -1598,8 +1599,109 @@ public final class CheckIndex implements Closeable {
}
}
}
+
+ // Test score blocks
+ // We only score on freq to keep things simple and not pull norms
+ SimScorer scorer = new SimScorer(field) {
+ @Override
+ public float score(float freq, long norm) {
+ return freq;
+ }
+ };
+
+ // First check max scores and block uptos
+ int max = -1;
+ float maxScore = 0;
+ ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
+ postings = termsEnum.postings(postings, PostingsEnum.FREQS);
+ for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
+ if (postings.nextDoc() != doc) {
+ throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID());
+ }
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ if (postings.freq() != impacts.freq()) {
+ throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
+ }
+ if (doc > max) {
+ max = impacts.advanceShallow(doc);
+ if (max < doc) {
+ throw new RuntimeException("max block doc id " + max + " must be greater than the target: " + doc);
+ }
+ maxScore = impacts.getMaxScore(max);
+ }
+ int max2 = impacts.advanceShallow(doc);
+ if (max != max2) {
+ throw new RuntimeException("max is not stable, initially had " + max + " but now " + max2);
+ }
+ float score = scorer.score(impacts.freq(), 1);
+ if (score > maxScore) {
+ throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
+ }
+ }
+
+ // Now check advancing
+ impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
+ postings = termsEnum.postings(postings, PostingsEnum.FREQS);
+
+ max = -1;
+ while (true) {
+ int doc = impacts.docID();
+ boolean advance;
+ int target;
+ if (((field.hashCode() + doc) & 1) == 1) {
+ advance = false;
+ target = doc + 1;
+ } else {
+ advance = true;
+ int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
+ target = impacts.docID() + delta;
+ }
+
+ if (target > max && target % 2 == 1) {
+ int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target);
+ max = target + delta;
+ int m = impacts.advanceShallow(target);
+ if (m < target) {
+ throw new RuntimeException("Block max doc: " + m + " is less than the target " + target);
+ }
+ maxScore = impacts.getMaxScore(max);
+ }
+
+ if (advance) {
+ doc = impacts.advance(target);
+ } else {
+ doc = impacts.nextDoc();
+ }
+
+ if (postings.advance(target) != doc) {
+ throw new RuntimeException("Impacts do not advance to the same document as postings for target " + target + ", postings: " + postings.docID() + ", impacts: " + doc);
+ }
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ if (postings.freq() != impacts.freq()) {
+ throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
+ }
+
+ if (doc >= max) {
+ int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
+ max = doc + delta;
+ int m = impacts.advanceShallow(doc);
+ if (m < doc) {
+ throw new RuntimeException("Block max doc: " + m + " is less than the target " + doc);
+ }
+ maxScore = impacts.getMaxScore(max);
+ }
+
+ float score = scorer.score(impacts.freq(), 1);
+ if (score > maxScore) {
+ throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
+ }
+ }
}
-
+
if (minTerm != null && status.termCount + status.delTermCount == 0) {
throw new RuntimeException("field=\"" + field + "\": minTerm is non-null yet we saw no terms: " + minTerm);
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
index fd24105..4e05aa6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@@ -31,6 +31,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.FieldType;
@@ -126,6 +127,7 @@ final class DefaultIndexingChain extends DocConsumer {
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write norms");
}
+ SegmentReadState readState = new SegmentReadState(state.directory, state.segmentInfo, state.fieldInfos, IOContext.READ, state.segmentSuffix);
t0 = System.nanoTime();
writeDocValues(state, sortMap);
@@ -159,7 +161,16 @@ final class DefaultIndexingChain extends DocConsumer {
}
}
- termsHash.flush(fieldsToFlush, state, sortMap);
+ try (NormsProducer norms = readState.fieldInfos.hasNorms()
+ ? state.segmentInfo.getCodec().normsFormat().normsProducer(readState)
+ : null) {
+ NormsProducer normsMergeInstance = null;
+ if (norms != null) {
+ // Use the merge instance in order to reuse the same IndexInput for all terms
+ normsMergeInstance = norms.getMergeInstance();
+ }
+ termsHash.flush(fieldsToFlush, state, sortMap, normsMergeInstance);
+ }
if (docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", ((System.nanoTime()-t0)/1000000) + " msec to write postings and finish vectors");
}
@@ -693,6 +704,9 @@ final class DefaultIndexingChain extends DocConsumer {
normValue = 0;
} else {
normValue = similarity.computeNorm(invertState);
+ if (normValue == 0) {
+ throw new IllegalStateException("Similarity " + similarity + " return 0 for non-empty field");
+ }
}
norms.addValue(docState.docID, normValue);
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
index 0450038..4a9b660 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -214,6 +215,10 @@ public abstract class FilterLeafReader extends LeafReader {
return in.postings(reuse, flags);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return in.impacts(scorer, flags);
+ }
}
/** Base class for filtering {@link PostingsEnum} implementations. */
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
index 6498dc0..411b435 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
/**
@@ -181,7 +182,12 @@ public abstract class FilteredTermsEnum extends TermsEnum {
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return tenum.postings(reuse, flags);
}
-
+
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return tenum.impacts(scorer, flags);
+ }
+
/** This enum does not support seeking!
* @throws UnsupportedOperationException In general, subclasses do not
* support seeking.
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
index fb78a92..c3e7d71 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -273,6 +274,11 @@ class FreqProxFields extends Fields {
return docsEnum;
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
* without re-seeking the term dictionary.
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
index d953f8d..ac70669 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.IOUtils;
@@ -78,8 +79,9 @@ final class FreqProxTermsWriter extends TermsHash {
}
@Override
- public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
- super.flush(fieldsToFlush, state, sortMap);
+ public void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state,
+ Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
+ super.flush(fieldsToFlush, state, sortMap, norms);
// Gather all fields that saw any postings:
List<FreqProxTermsWriterPerField> allFields = new ArrayList<>();
@@ -105,7 +107,7 @@ final class FreqProxTermsWriter extends TermsHash {
FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
boolean success = false;
try {
- consumer.write(fields);
+ consumer.write(fields, norms);
success = true;
} finally {
if (success) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
new file mode 100644
index 0000000..8deccff
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+/**
+ * Extension of {@link PostingsEnum} which also provides information about the
+ * produced scores.
+ * @lucene.experimental
+ */
+public abstract class ImpactsEnum extends PostingsEnum {
+
+ /** Sole constructor. */
+ protected ImpactsEnum() {}
+
+ /**
+ * Advance to the block of documents that contains {@code target} in order to
+ * get scoring information about this block. This method is implicitly called
+ * by {@link DocIdSetIterator#advance(int)} and
+ * {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
+ * current {@link DocIdSetIterator#docID()}.
+ * It returns a number that is greater than or equal to all documents
+ * contained in the current block, but less than any doc IDS of the next block.
+ * {@code target} must be &gt;= {@link #docID()} as well as all targets that
+ * have been passed to {@link #advanceShallow(int)} so far.
+ */
+ public abstract int advanceShallow(int target) throws IOException;
+
+ /**
+ * Return the maximum score that documents between the last {@code target}
+ * that this iterator was {@link #advanceShallow(int) shallow-advanced} to
+ * included and {@code upTo} included.
+ */
+ public abstract float getMaxScore(int upTo) throws IOException;
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
index 7db838b..7de8427 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -367,6 +368,11 @@ public final class MultiTermsEnum extends TermsEnum {
return docsEnum.reset(subDocs, upto);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
final static class TermsEnumWithSlice {
private final ReaderSlice subSlice;
TermsEnum terms;
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index c67b92d..ad60a94 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -24,6 +24,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.NormsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsWriter;
@@ -109,10 +110,33 @@ final class SegmentMerger {
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo,
mergeState.mergeFieldInfos, null, context);
+ final SegmentReadState segmentReadState = new SegmentReadState(directory, mergeState.segmentInfo, mergeState.mergeFieldInfos,
+ IOContext.READ, segmentWriteState.segmentSuffix);
+
+ if (mergeState.mergeFieldInfos.hasNorms()) {
+ if (mergeState.infoStream.isEnabled("SM")) {
+ t0 = System.nanoTime();
+ }
+ mergeNorms(segmentWriteState);
+ if (mergeState.infoStream.isEnabled("SM")) {
+ long t1 = System.nanoTime();
+ mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]");
+ }
+ }
+
if (mergeState.infoStream.isEnabled("SM")) {
t0 = System.nanoTime();
}
- mergeTerms(segmentWriteState);
+ try (NormsProducer norms = mergeState.mergeFieldInfos.hasNorms()
+ ? codec.normsFormat().normsProducer(segmentReadState)
+ : null) {
+ NormsProducer normsMergeInstance = null;
+ if (norms != null) {
+ // Use the merge instance in order to reuse the same IndexInput for all terms
+ normsMergeInstance = norms.getMergeInstance();
+ }
+ mergeTerms(segmentWriteState, normsMergeInstance);
+ }
if (mergeState.infoStream.isEnabled("SM")) {
long t1 = System.nanoTime();
mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge postings [" + numMerged + " docs]");
@@ -139,17 +163,6 @@ final class SegmentMerger {
long t1 = System.nanoTime();
mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge points [" + numMerged + " docs]");
}
-
- if (mergeState.mergeFieldInfos.hasNorms()) {
- if (mergeState.infoStream.isEnabled("SM")) {
- t0 = System.nanoTime();
- }
- mergeNorms(segmentWriteState);
- if (mergeState.infoStream.isEnabled("SM")) {
- long t1 = System.nanoTime();
- mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]");
- }
- }
if (mergeState.mergeFieldInfos.hasVectors()) {
if (mergeState.infoStream.isEnabled("SM")) {
@@ -225,9 +238,9 @@ final class SegmentMerger {
}
}
- private void mergeTerms(SegmentWriteState segmentWriteState) throws IOException {
+ private void mergeTerms(SegmentWriteState segmentWriteState, NormsProducer norms) throws IOException {
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState)) {
- consumer.merge(mergeState);
+ consumer.merge(mergeState, norms);
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java
new file mode 100644
index 0000000..9ba27e2
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowImpactsEnum.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * {@link ImpactsEnum} that doesn't index impacts but implements the API in a
+ * legal way. This should typically be used for short postings that do not need
+ * skipping.
+ */
+public final class SlowImpactsEnum extends ImpactsEnum {
+
+ private final PostingsEnum delegate;
+ private final float maxScore;
+
+ /** Wrap the given {@link PostingsEnum}. */
+ public SlowImpactsEnum(PostingsEnum delegate, float maxScore) {
+ this.delegate = delegate;
+ this.maxScore = maxScore;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return delegate.nextDoc();
+ }
+
+ @Override
+ public int docID() {
+ return delegate.docID();
+ }
+
+ @Override
+ public long cost() {
+ return delegate.cost();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return delegate.advance(target);
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return delegate.startOffset();
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ return delegate.nextPosition();
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return delegate.getPayload();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return delegate.freq();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return delegate.endOffset();
+ }
+
+ @Override
+ public int advanceShallow(int target) {
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public float getMaxScore(int maxDoc) {
+ return maxScore;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
index ccee7a3..70d4387 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -110,6 +111,11 @@ class SortedDocValuesTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
index eba95c9..9099ac8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
@@ -17,6 +17,7 @@
package org.apache.lucene.index;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -110,6 +111,11 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
index dff808e..054ca50 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.search.DocIdSetIterator;
@@ -37,8 +38,8 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
}
@Override
- void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
- super.flush(fieldsToFlush, state, sortMap);
+ void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
+ super.flush(fieldsToFlush, state, sortMap, norms);
if (tmpDirectory != null) {
if (sortMap == null) {
// we're lucky the index is already sorted, just rename the temporary file and return
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
index 46dc63c..1ac20dd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
@@ -53,7 +54,7 @@ class TermVectorsConsumer extends TermsHash {
}
@Override
- void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
+ void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
if (writer != null) {
int numDocs = state.segmentInfo.maxDoc();
assert numDocs > 0;
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
index 4b5755a..7bbb3f7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
@@ -171,6 +172,12 @@ public abstract class TermsEnum implements BytesRefIterator {
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
/**
+ * Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
+ * @see #postings(PostingsEnum, int)
+ */
+ public abstract ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException;
+
+ /**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
* without re-seeking the term dictionary.
* <p>
@@ -228,7 +235,12 @@ public abstract class TermsEnum implements BytesRefIterator {
public PostingsEnum postings(PostingsEnum reuse, int flags) {
throw new IllegalStateException("this method should never be called");
}
-
+
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new IllegalStateException("this method should never be called");
+ }
+
@Override
public BytesRef next() {
return null;
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHash.java b/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
index bede2f8..f420aca 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsHash.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
@@ -76,13 +77,14 @@ abstract class TermsHash {
bytePool.reset(false, false);
}
- void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
+ void flush(Map<String,TermsHashPerField> fieldsToFlush, final SegmentWriteState state,
+ Sorter.DocMap sortMap, NormsProducer norms) throws IOException {
if (nextTermsHash != null) {
Map<String,TermsHashPerField> nextChildFields = new HashMap<>();
for (final Map.Entry<String,TermsHashPerField> entry : fieldsToFlush.entrySet()) {
nextChildFields.put(entry.getKey(), entry.getValue().nextPerField);
}
- nextTermsHash.flush(nextChildFields, state, sortMap);
+ nextTermsHash.flush(nextChildFields, state, sortMap, norms);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index 881c5dd..72f9473 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -17,11 +17,13 @@
package org.apache.lucene.search;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
@@ -273,6 +275,11 @@ public final class FuzzyTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return actualEnum.impacts(scorer, flags);
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
actualEnum.seekExact(term, state);
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
index 5de8295..f3dc5ea 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java
@@ -40,6 +40,11 @@ public final class LeafSimScorer {
maxScore = needsScores ? scorer.score(maxFreq, 1) : Float.MAX_VALUE;
}
+ /** Return the wrapped {@link SimScorer}. */
+ public SimScorer getSimScorer() {
+ return scorer;
+ }
+
private long getNormValue(int doc) throws IOException {
if (norms != null) {
boolean found = norms.advanceExact(doc);
diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
index d9335cf..1eba910 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@@ -29,7 +29,6 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
@@ -208,9 +207,8 @@ public final class SynonymQuery extends Query {
termsEnum.seekExact(terms[i].bytes(), state);
long termMaxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq());
totalMaxFreq += termMaxFreq;
- PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS);
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), true, termMaxFreq);
- subScorers.add(new TermScorer(this, postings, simScorer));
+ subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer));
}
}
if (subScorers.isEmpty()) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index d629acd..f1f4415 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
@@ -46,21 +45,21 @@ public class TermQuery extends Query {
private final Similarity similarity;
private final Similarity.SimScorer simScorer;
private final TermStates termStates;
- private final boolean needsScores;
+ private final ScoreMode scoreMode;
- public TermWeight(IndexSearcher searcher, boolean needsScores,
+ public TermWeight(IndexSearcher searcher, ScoreMode scoreMode,
float boost, TermStates termStates) throws IOException {
super(TermQuery.this);
- if (needsScores && termStates == null) {
+ if (scoreMode.needsScores() && termStates == null) {
throw new IllegalStateException("termStates are required when scores are needed");
}
- this.needsScores = needsScores;
+ this.scoreMode = scoreMode;
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
final CollectionStatistics collectionStats;
final TermStatistics termStats;
- if (needsScores) {
+ if (scoreMode.needsScores()) {
collectionStats = searcher.collectionStatistics(term.field());
termStats = searcher.termStatistics(term, termStates);
} else {
@@ -97,10 +96,9 @@ public class TermQuery extends Query {
.getFieldInfos()
.fieldInfo(getTerm().field())
.getIndexOptions();
- PostingsEnum docs = termsEnum.postings(null, needsScores ? PostingsEnum.FREQS : PostingsEnum.NONE);
- assert docs != null;
float maxFreq = getMaxFreq(indexOptions, termsEnum.totalTermFreq(), termsEnum.docFreq());
- return new TermScorer(this, docs, new LeafSimScorer(simScorer, context.reader(), needsScores, maxFreq));
+ LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), scoreMode.needsScores(), maxFreq);
+ return new TermScorer(this, termsEnum, scoreMode, scorer);
}
private long getMaxFreq(IndexOptions indexOptions, long ttf, long df) {
@@ -198,7 +196,7 @@ public class TermQuery extends Query {
termState = this.perReaderTermState;
}
- return new TermWeight(searcher, scoreMode.needsScores(), boost, termState);
+ return new TermWeight(searcher, scoreMode, boost, termState);
}
/** Prints a user-readable version of this query. */
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
index 653a60e..fc426da 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
@@ -19,29 +19,92 @@ package org.apache.lucene.search;
import java.io.IOException;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermsEnum;
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
*/
final class TermScorer extends Scorer {
private final PostingsEnum postingsEnum;
+ private final DocIdSetIterator iterator;
private final LeafSimScorer docScorer;
+ private float minCompetitiveScore;
/**
* Construct a <code>TermScorer</code>.
*
* @param weight
* The weight of the <code>Term</code> in the query.
- * @param td
- * An iterator over the documents matching the <code>Term</code>.
+ * @param te
+ * A {@link TermsEnum} positioned on the expected term.
* @param docScorer
- * The <code>Similarity.SimScorer</code> implementation
- * to be used for score computations.
+ * A {@link LeafSimScorer} for the appropriate field.
*/
- TermScorer(Weight weight, PostingsEnum td, LeafSimScorer docScorer) {
+ TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
- this.postingsEnum = td;
+ if (scoreMode == ScoreMode.TOP_SCORES) {
+ ImpactsEnum impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS);
+ postingsEnum = impactsEnum;
+ iterator = new DocIdSetIterator() {
+
+ int upTo = -1;
+ float maxScore;
+
+ private int advanceTarget(int target) throws IOException {
+ if (minCompetitiveScore == 0) {
+ // no potential for skipping
+ return target;
+ }
+
+ if (target > upTo) {
+ upTo = impactsEnum.advanceShallow(target);
+ maxScore = impactsEnum.getMaxScore(upTo);
+ }
+
+ while (true) {
+ assert upTo >= target;
+
+ if (maxScore >= minCompetitiveScore) {
+ return target;
+ }
+
+ if (upTo == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+
+ target = upTo + 1;
+
+ upTo = impactsEnum.advanceShallow(target);
+ maxScore = impactsEnum.getMaxScore(upTo);
+ }
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return impactsEnum.advance(advanceTarget(target));
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(impactsEnum.docID() + 1);
+ }
+
+ @Override
+ public int docID() {
+ return impactsEnum.docID();
+ }
+
+ @Override
+ public long cost() {
+ return impactsEnum.cost();
+ }
+ };
+ } else {
+ postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
+ iterator = postingsEnum;
+ }
}
@Override
@@ -55,7 +118,7 @@ final class TermScorer extends Scorer {
@Override
public DocIdSetIterator iterator() {
- return postingsEnum;
+ return iterator;
}
@Override
@@ -69,6 +132,11 @@ final class TermScorer extends Scorer {
return docScorer.maxScore();
}
+ @Override
+ public void setMinCompetitiveScore(float minScore) {
+ this.minCompetitiveScore = minScore;
+ }
+
/** Returns a string representation of this <code>TermScorer</code>. */
@Override
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java
new file mode 100644
index 0000000..5743e64
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCompetitiveFreqNormAccumulator.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase {
+
+ public void testBasics() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+ Set<FreqAndNorm> expected = new HashSet<>();
+
+ acc.add(3, 5);
+ expected.add(new FreqAndNorm(3, 5));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(6, 11);
+ expected.add(new FreqAndNorm(6, 11));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(10, 13);
+ expected.add(new FreqAndNorm(10, 13));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(1, 2);
+ expected.add(new FreqAndNorm(1, 2));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(7, 9);
+ expected.remove(new FreqAndNorm(6, 11));
+ expected.add(new FreqAndNorm(7, 9));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(8, 2);
+ expected.clear();
+ expected.add(new FreqAndNorm(10, 13));
+ expected.add(new FreqAndNorm(8, 2));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+ }
+
+ public void testExtremeNorms() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+ Set<FreqAndNorm> expected = new HashSet<>();
+
+ acc.add(3, 5);
+ expected.add(new FreqAndNorm(3, 5));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(10, 10000);
+ expected.add(new FreqAndNorm(10, 10000));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(5, 200);
+ expected.add(new FreqAndNorm(5, 200));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(20, -100);
+ expected.add(new FreqAndNorm(20, -100));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+
+ acc.add(30, -3);
+ expected.add(new FreqAndNorm(30, -3));
+ assertEquals(expected, acc.getCompetitiveFreqNormPairs());
+ }
+
+ public void testOmitFreqs() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+
+ acc.add(1, 5);
+ acc.add(1, 7);
+ acc.add(1, 4);
+
+ assertEquals(Collections.singleton(new FreqAndNorm(1, 4)), acc.getCompetitiveFreqNormPairs());
+ }
+
+ public void testOmitNorms() {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+
+ acc.add(5, 1);
+ acc.add(7, 1);
+ acc.add(4, 1);
+
+ assertEquals(Collections.singleton(new FreqAndNorm(7, 1)), acc.getCompetitiveFreqNormPairs());
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
index f2ed86c..d507b7b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
@@ -17,8 +17,11 @@
package org.apache.lucene.codecs.lucene50;
+import java.io.IOException;
+
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
import org.apache.lucene.codecs.blocktree.FieldReader;
import org.apache.lucene.codecs.blocktree.Stats;
import org.apache.lucene.document.Document;
@@ -27,7 +30,12 @@ import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.TestUtil;
/**
@@ -78,4 +86,56 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
shouldFail(10, -1);
shouldFail(10, 12);
}
+
+ public void testImpactSerialization() throws IOException {
+ // omit norms and omit freqs
+ doTestImpactSerialization(new int[] { 1 }, new long[] { 1L });
+
+ // omit freqs
+ doTestImpactSerialization(new int[] { 1 }, new long[] { 42L });
+ // omit freqs with very large norms
+ doTestImpactSerialization(new int[] { 1 }, new long[] { -100L });
+
+ // omit norms
+ doTestImpactSerialization(new int[] { 30 }, new long[] { 1L });
+ // omit norms with large freq
+ doTestImpactSerialization(new int[] { 500 }, new long[] { 1L });
+
+ // freqs and norms, basic
+ doTestImpactSerialization(
+ new int[] { 1, 3, 7, 15, 20, 28 },
+ new long[] { 7L, 9L, 10L, 11L, 13L, 14L });
+
+ // freqs and norms, high values
+ doTestImpactSerialization(
+ new int[] { 2, 10, 12, 50, 1000, 1005 },
+ new long[] { 2L, 10L, 50L, -100L, -80L, -3L });
+ }
+
+ private void doTestImpactSerialization(int[] freqs, long[] norms) throws IOException {
+ CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
+ for (int i = 0; i < freqs.length; ++i) {
+ acc.add(freqs[i], norms[i]);
+ }
+ try(Directory dir = newDirectory()) {
+ try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
+ Lucene50SkipWriter.writeImpacts(acc, out);
+ }
+ try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+ byte[] b = new byte[Math.toIntExact(in.length())];
+ in.readBytes(b, 0, b.length);
+ Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new SimScorer("") {
+ int i = 0;
+
+ @Override
+ public float score(float freq, long norm) {
+ assert freq == freqs[i];
+ assert norm == norms[i];
+ i++;
+ return 0;
+ }
+ });
+ }
+ }
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
index 804f507..84544bc 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
@@ -407,17 +408,17 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
final FieldsConsumer consumer = delegate.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
- public void write(Fields fields) throws IOException {
- consumer.write(fields);
+ public void write(Fields fields, NormsProducer norms) throws IOException {
+ consumer.write(fields, norms);
}
@Override
- public void merge(MergeState mergeState) throws IOException {
+ public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
nbMergeCalls++;
for (FieldInfo fi : mergeState.mergeFieldInfos) {
fieldNames.add(fi.name);
}
- consumer.merge(mergeState);
+ consumer.merge(mergeState, norms);
}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index 4625f73..efe4587 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -17,6 +17,7 @@
package org.apache.lucene.index;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -28,10 +29,12 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -676,6 +679,10 @@ public class TestCodecs extends LuceneTestCase {
return new DataPostingsEnum(fieldData.terms[upto]);
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
}
private static class DataPostingsEnum extends PostingsEnum {
@@ -752,9 +759,65 @@ public class TestCodecs extends LuceneTestCase {
Arrays.sort(fields);
FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
+ NormsProducer fakeNorms = new NormsProducer() {
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+
+ @Override
+ public void close() throws IOException {}
+
+ @Override
+ public NumericDocValues getNorms(FieldInfo field) throws IOException {
+ return new NumericDocValues() {
+
+ int doc = -1;
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return si.maxDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= si.maxDoc()) {
+ return doc = NO_MORE_DOCS;
+ } else {
+ return doc = target;
+ }
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ doc = target;
+ return true;
+ }
+
+ @Override
+ public long longValue() throws IOException {
+ return 1;
+ }
+ };
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {}
+ };
boolean success = false;
try {
- consumer.write(new DataFields(fields));
+ consumer.write(new DataFields(fields), fakeNorms);
success = true;
} finally {
if (success) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
index 2758c96..73d3e6a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
@@ -24,12 +24,18 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -202,4 +208,57 @@ public class TestTermScorer extends LuceneTestCase {
// should not fail this time since norms are not necessary
weight2.scorer(forbiddenNorms.getContext()).iterator().nextDoc();
}
+
+ public void testRandomTopDocs() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ int numValues = random().nextInt(1 << random().nextInt(5));
+ int start = random().nextInt(10);
+ for (int j = 0; j < numValues; ++j) {
+ doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
+ }
+ w.addDocument(doc);
+ }
+ IndexReader reader = DirectoryReader.open(w);
+ w.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ for (int iter = 0; iter < 15; ++iter) {
+ Query query = new TermQuery(new Term("foo", Integer.toString(iter)));
+
+ TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
+ TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
+
+ searcher.search(query, collector1);
+ searcher.search(query, collector2);
+ assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
+
+ int filterTerm = random().nextInt(15);
+ Query filteredQuery = new BooleanQuery.Builder()
+ .add(query, Occur.MUST)
+ .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
+ .build();
+
+ collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
+ collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
+ searcher.search(filteredQuery, collector1);
+ searcher.search(filteredQuery, collector2);
+ assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
+ }
+ reader.close();
+ dir.close();
+ }
+
+ private static void assertTopDocsEquals(TopDocs td1, TopDocs td2) {
+ assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
+ for (int i = 0; i < td1.scoreDocs.length; ++i) {
+ ScoreDoc sd1 = td1.scoreDocs[i];
+ ScoreDoc sd2 = td2.scoreDocs[i];
+ assertEquals(sd1.doc, sd2.doc);
+ assertEquals(sd1.score, sd2.score, 0f);
+ }
+ }
}
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 0d8d949..4014e8c 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -42,6 +42,7 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@@ -1425,6 +1426,11 @@ public class MemoryIndex {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1L));
+ }
+
+ @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null;
this.seekExact(((OrdTermState)state).ord);
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
index 54f4aa4..4203e07 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java
@@ -23,7 +23,9 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
@@ -88,6 +90,11 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
}
@Override
+ public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly");
+ }
+
+ @Override
public long ramBytesUsed() {
return 0;
}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
index fc643d2..30e1980 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java
@@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@@ -78,7 +79,7 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
}
@Override
- public void startTerm() {
+ public void startTerm(NumericDocValues norms) {
lastDocID = -1;
}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
index 0af64d9..d5f51e0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnum.java
@@ -20,9 +20,12 @@ import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -1006,6 +1009,13 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ // Only one posting, the slow impl is fine
+ // We could make this throw UOE but then CheckIndex is angry
+ return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
+ }
+
+ @Override
public void seekExact(BytesRef target, TermState otherState) {
// if (DEBUG) {
// System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
index e9187af..078ca8b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
@@ -23,6 +23,7 @@ import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.index.FieldInfo;
@@ -221,7 +222,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
String lastField = null;
for(String field : fields) {
@@ -241,7 +242,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
if (term == null) {
break;
}
- termsWriter.write(term, termsEnum);
+ termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
@@ -729,8 +730,8 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
}
/** Writes one term's worth of postings. */
- public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
- BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
+ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {
+ BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms);
// TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on flush:
if (state != null && ((IDVersionPostingsWriter) postingsWriter).lastDocID != -1) {
assert state.docFreq != 0;
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java
index 9df9d60..5ab16a9 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
@@ -44,7 +45,7 @@ import static org.apache.lucene.search.suggest.document.CompletionPostingsFormat
/**
* <p>
- * Weighted FSTs for any indexed {@link SuggestField} is built on {@link #write(Fields)}.
+ * Weighted FSTs for any indexed {@link SuggestField} is built on {@link #write(Fields,NormsProducer)}.
* A weighted FST maps the analyzed forms of a field to its
* surface form and document id. FSTs are stored in the CompletionDictionary (.lkp).
* </p>
@@ -80,8 +81,8 @@ final class CompletionFieldsConsumer extends FieldsConsumer {
}
@Override
- public void write(Fields fields) throws IOException {
- delegateFieldsConsumer.write(fields);
+ public void write(Fields fields, NormsProducer norms) throws IOException {
+ delegateFieldsConsumer.write(fields, norms);
for (String field : fields) {
CompletionTermWriter termWriter = new CompletionTermWriter();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
index a89b508..e71903d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
@@ -22,6 +22,7 @@ import java.util.Iterator;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.AssertingLeafReader;
import org.apache.lucene.index.FieldInfo;
@@ -133,8 +134,8 @@ public final class AssertingPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
- in.write(fields);
+ public void write(Fields fields, NormsProducer norms) throws IOException {
+ in.write(fields, norms);
// TODO: more asserts? can we somehow run a
// "limited" CheckIndex here??? Or ... can we improve
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java
index 2ca1bc7..00e168b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/CrankyPostingsFormat.java
@@ -21,6 +21,7 @@ import java.util.Random;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.SegmentReadState;
@@ -61,11 +62,11 @@ class CrankyPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
if (random.nextInt(100) == 0) {
throw new IOException("Fake IOException from FieldsConsumer.write()");
}
- delegate.write(fields);
+ delegate.write(fields, norms);
}
@Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
index 4b85f13..37c078f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
@@ -31,17 +31,21 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Accountable;
@@ -227,7 +231,7 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
}
@Override
- public void write(Fields fields) throws IOException {
+ public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
@@ -472,6 +476,10 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
return new RAMDocsEnum(ramField.termToDocs.get(current));
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
+ }
}
private static class RAMDocsEnum extends PostingsEnum {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
index c87697b..dfec1db 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
@@ -23,6 +23,7 @@ import java.util.Objects;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
@@ -209,6 +210,15 @@ public class AssertingLeafReader extends FilterLeafReader {
}
}
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ assertThread("Terms enums", creationThread);
+ assert state == State.POSITIONED: "docs(...) called on unpositioned TermsEnum";
+ assert (flags & PostingsEnum.FREQS) != 0 : "Freqs should be requested on impacts";
+
+ return new AssertingImpactsEnum(super.impacts(scorer, flags));
+ }
+
// TODO: we should separately track if we are 'at the end' ?
// someone should not call next() after it returns null!!!!
@Override
@@ -439,6 +449,84 @@ public class AssertingLeafReader extends FilterLeafReader {
}
}
+ /** Wraps a {@link ImpactsEnum} with additional checks */
+ public static class AssertingImpactsEnum extends ImpactsEnum {
+
+ private final AssertingPostingsEnum assertingPostings;
+ private final ImpactsEnum in;
+ private int lastShallowTarget;
+
+ AssertingImpactsEnum(ImpactsEnum impacts) {
+ in = impacts;
+ // inherit checks from AssertingPostingsEnum
+ assertingPostings = new AssertingPostingsEnum(impacts);
+ }
+
+ @Override
+ public int advanceShallow(int target) throws IOException {
+ assert target >= lastShallowTarget : "called on decreasing targets: target = " + target + " < last target = " + lastShallowTarget;
+ assert target >= docID() : "target = " + target + " < docID = " + docID();
+ int upTo = in.advanceShallow(target);
+ assert upTo >= target : "upTo = " + upTo + " < target = " + target;
+ lastShallowTarget = target;
+ return upTo;
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last shallow target = " + lastShallowTarget;
+ float maxScore = in.getMaxScore(upTo);
+ return maxScore;
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return assertingPostings.freq();
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ return assertingPostings.nextPosition();
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return assertingPostings.startOffset();
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return assertingPostings.endOffset();
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return assertingPostings.getPayload();
+ }
+
+ @Override
+ public int docID() {
+ return assertingPostings.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ assert docID() + 1 >= lastShallowTarget : "target = " + (docID() + 1) + " < last shallow target = " + lastShallowTarget;
+ return assertingPostings.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ assert target >= lastShallowTarget : "target = " + target + " < last shallow target = " + lastShallowTarget;
+ return assertingPostings.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return assertingPostings.cost();
+ }
+ }
+
/** Wraps a NumericDocValues but with additional asserts */
public static class AssertingNumericDocValues extends NumericDocValues {
private final Thread creationThread = Thread.currentThread();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
index ab92946..f5b5223 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
@@ -334,8 +334,30 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
// PostingsFormat
+ NormsProducer fakeNorms = new NormsProducer() {
+
+ @Override
+ public void close() throws IOException {}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+
+ @Override
+ public NumericDocValues getNorms(FieldInfo field) throws IOException {
+ if (field.hasNorms() == false) {
+ return null;
+ }
+ return oneDocReader.getNormValues(field.name);
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {}
+
+ };
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
- consumer.write(MultiFields.getFields(oneDocReader));
+ consumer.write(MultiFields.getFields(oneDocReader), fakeNorms);
IOUtils.close(consumer);
IOUtils.close(consumer);
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
index 617a721..6b301f9 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
@@ -450,9 +450,10 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
}
Directory dir = applyCreatedVersionMajor(newDirectory());
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
- IndexWriterConfig conf = newIndexWriterConfig(analyzer);conf.setMergePolicy(NoMergePolicy.INSTANCE);
- conf.setSimilarity(new CannedNormSimilarity(norms));
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ IndexWriterConfig conf = newIndexWriterConfig(analyzer).setMergePolicy(NoMergePolicy.INSTANCE);
+ CannedNormSimilarity sim = new CannedNormSimilarity(norms);
+ conf.setSimilarity(sim);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
@@ -471,7 +472,8 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
} else {
long value = norms[j++];
dvField.setLongValue(value);
- indexedField.setStringValue(Long.toString(value));
+ // only empty fields may have 0 as a norm
+ indexedField.setStringValue(value == 0 ? "" : "a");
writer.addDocument(doc);
}
if (random().nextInt(31) == 0) {
@@ -530,7 +532,13 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
@Override
public long computeNorm(FieldInvertState state) {
- return norms[index++];
+ assert state.length > 0;
+ while (true) {
+ long norm = norms[index++];
+ if (norm != 0) {
+ return norm;
+ }
+ }
}
@Override
@@ -642,7 +650,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
}
Directory dir = applyCreatedVersionMajor(newDirectory());
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
IndexWriterConfig conf = newIndexWriterConfig(analyzer);conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setSimilarity(new CannedNormSimilarity(norms));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
@@ -663,7 +671,7 @@ public abstract class BaseNormsFormatTestCase extends BaseIndexFileFormatTestCas
} else {
long value = norms[j++];
dvField.setLongValue(value);
- indexedField.setStringValue(Long.toString(value));
+ indexedField.setStringValue(value == 0 ? "" : "a");
writer.addDocument(doc);
}
if (random().nextInt(31) == 0) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
index f69ca55..51b418e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -398,8 +399,8 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
return new FieldsConsumer() {
@Override
- public void write(Fields fields) throws IOException {
- fieldsConsumer.write(fields);
+ public void write(Fields fields, NormsProducer norms) throws IOException {
+ fieldsConsumer.write(fields, norms);
boolean isMerge = state.context.context == IOContext.Context.MERGE;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
index d5eb105..278f4b2 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
@@ -32,10 +32,14 @@ import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.function.IntToLongFunction;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.NormsProducer;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
@@ -60,6 +64,9 @@ import static org.junit.Assert.assertTrue;
/** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */
public class RandomPostingsTester {
+ private static final IntToLongFunction DOC_TO_NORM = doc -> 1 + (doc & 0x0f);
+ private static final long MAX_NORM = 0x10;
+
/** Which features to test. */
public enum Option {
// Sometimes use .advance():
@@ -599,6 +606,11 @@ public class RandomPostingsTester {
}
return getSeedPostings(current.getKey().utf8ToString(), current.getValue().seed, maxAllowed, allowPayloads);
}
+
+ @Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ throw new UnsupportedOperationException();
+ }
}
private static class ThreadState {
@@ -653,10 +665,70 @@ public class RandomPostingsTester {
Fields seedFields = new SeedFields(fields, newFieldInfos, maxAllowed, allowPayloads);
+ NormsProducer fakeNorms = new NormsProducer() {
+
+ @Override
+ public void close() throws IOException {}
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+
+ @Override
+ public NumericDocValues getNorms(FieldInfo field) throws IOException {
+ if (newFieldInfos.fieldInfo(field.number).hasNorms()) {
+ return new NumericDocValues() {
+
+ int doc = -1;
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (++doc == segmentInfo.maxDoc()) {
+ return doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return segmentInfo.maxDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return doc = target >= segmentInfo.maxDoc() ? DocIdSetIterator.NO_MORE_DOCS : target;
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ doc = target;
+ return true;
+ }
+
+ @Override
+ public long longValue() throws IOException {
+ return DOC_TO_NORM.applyAsLong(doc);
+ }
+ };
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {}
+
+ };
FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState);
boolean success = false;
try {
- consumer.write(seedFields);
+ consumer.write(seedFields, fakeNorms);
success = true;
} finally {
if (success) {
@@ -975,6 +1047,136 @@ public class RandomPostingsTester {
}
}
}
+
+ if (options.contains(Option.SKIPPING)) {
+ final IntToLongFunction docToNorm;
+ if (fieldInfo.hasNorms()) {
+ docToNorm = DOC_TO_NORM;
+ } else {
+ docToNorm = doc -> 1L;
+ }
+ for (int s = 0; s < 3; ++s) {
+ final int scoreMode = s;
+ SimScorer scorer = new SimScorer(field) {
+ @Override
+ public float score(float freq, long norm) {
+ switch (scoreMode) {
+ case 0:
+ return freq; // make sure the postings record the best freq
+ case 1:
+ return 1f / norm; // make sure the postings record the best norm
+ default:
+ return freq - norm + MAX_NORM; // now a combination that could make intermediate pairs more competitive
+ }
+ }
+ };
+
+ // First check max scores and block uptos
+ int max = -1;
+ float maxScore = 0;
+ int flags = PostingsEnum.FREQS;
+ if (doCheckPositions) {
+ flags |= PostingsEnum.POSITIONS;
+ if (doCheckOffsets) {
+ flags |= PostingsEnum.OFFSETS;
+ }
+ if (doCheckPayloads) {
+ flags |= PostingsEnum.PAYLOADS;
+ }
+ }
+
+ ImpactsEnum impacts = termsEnum.impacts(scorer, flags);
+ PostingsEnum postings = termsEnum.postings(null, flags);
+ for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
+ assertEquals(postings.nextDoc(), doc);
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ int freq = postings.freq();
+ assertEquals("freq is wrong", freq, impacts.freq());
+ for (int i = 0; i < freq; ++i) {
+ int pos = postings.nextPosition();
+ assertEquals("position is wrong", pos, impacts.nextPosition());
+ if (doCheckOffsets) {
+ assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
+ assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
+ }
+ if (doCheckPayloads) {
+ assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
+ }
+ }
+ if (doc > max) {
+ max = impacts.advanceShallow(doc);
+ assertTrue(max >= doc);
+ maxScore = impacts.getMaxScore(max);
+ }
+ assertEquals(max, impacts.advanceShallow(doc));
+ assertTrue(scorer.score(impacts.freq(), docToNorm.applyAsLong(doc)) <= maxScore);
+ }
+
+ // Now check advancing
+ impacts = termsEnum.impacts(scorer, flags);
+ postings = termsEnum.postings(postings, flags);
+
+ max = -1;
+ while (true) {
+ int doc = impacts.docID();
+ boolean advance;
+ int target;
+ if (random.nextBoolean()) {
+ advance = false;
+ target = doc + 1;
+ } else {
+ advance = true;
+ int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
+ target = impacts.docID() + delta;
+ }
+
+ if (target > max && random.nextBoolean()) {
+ int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
+ max = target + delta;
+ int m = impacts.advanceShallow(target);
+ assertTrue(m >= target);
+ maxScore = impacts.getMaxScore(max);
+ }
+
+ if (advance) {
+ doc = impacts.advance(target);
+ } else {
+ doc = impacts.nextDoc();
+ }
+
+ assertEquals(postings.advance(target), doc);
+ if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ int freq = postings.freq();
+ assertEquals("freq is wrong", freq, impacts.freq());
+ for (int i = 0; i < postings.freq(); ++i) {
+ int pos = postings.nextPosition();
+ assertEquals("position is wrong", pos, impacts.nextPosition());
+ if (doCheckOffsets) {
+ assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
+ assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
+ }
+ if (doCheckPayloads) {
+ assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
+ }
+ }
+
+ if (doc > max) {
+ int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
+ max = doc + delta;
+ int m = impacts.advanceShallow(doc);
+ assertTrue(m >= doc);
+ maxScore = impacts.getMaxScore(max);
+ }
+
+ float score = scorer.score(impacts.freq(), docToNorm.applyAsLong(doc));
+ assertTrue(score <= maxScore);
+ }
+ }
+ }
}
private static class TestThread extends Thread {
diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
index 7e54f8d..1b81c7f 100644
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
@@ -42,6 +43,7 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder;
@@ -239,6 +241,11 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return te.impacts(scorer, flags);
+ }
+
+ @Override
public BytesRef next() throws IOException {
if (positioned) {
positioned = false;
diff --git a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
index b56d462..daaf00d 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/DocTermOrds.java
@@ -26,6 +26,7 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LegacySortedSetDocValues;
import org.apache.lucene.index.LegacySortedSetDocValuesWrapper;
@@ -34,6 +35,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -607,6 +609,11 @@ public class DocTermOrds implements Accountable {
}
@Override
+ public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
+ return termsEnum.impacts(scorer, flags);
+ }
+
+ @Override
public BytesRef term() {
return term;
}