blob: 9ca14ff50e176e67940cfa582f50109fc1c32ca7 [file] [log] [blame]
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
index fd7cccd..07227d2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@@ -155,7 +155,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
@Override
public long cost() {
- return lead.cost();
+ return lead.cost(); // overestimate
}
/**
@@ -164,16 +164,33 @@ public class ConjunctionDISI extends DocIdSetIterator {
private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
private final TwoPhaseIterator[] twoPhaseIterators;
+ private final float matchCost;
private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseIterator> twoPhaseIterators) {
super(new ConjunctionDISI(iterators));
assert twoPhaseIterators.size() > 0;
+
+ CollectionUtil.timSort(twoPhaseIterators, new Comparator<TwoPhaseIterator>() {
+ @Override
+ public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
+ return Float.compare(o1.matchCost(), o2.matchCost());
+ }
+ });
+
this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);
+
+ // Compute the matchCost as the total matchCost of the sub iterators.
+ // TODO: This could be too high because the matching is done cheapest first: give the lower matchCosts a higher weight.
+ float totalMatchCost = 0;
+ for (TwoPhaseIterator tpi : twoPhaseIterators) {
+ totalMatchCost += tpi.matchCost();
+ }
+ matchCost = totalMatchCost;
}
@Override
public boolean matches() throws IOException {
- for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) {
+ for (TwoPhaseIterator twoPhaseIterator : twoPhaseIterators) { // match cheapest first
if (twoPhaseIterator.matches() == false) {
return false;
}
@@ -181,6 +198,11 @@ public class ConjunctionDISI extends DocIdSetIterator {
return true;
}
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
+
}
/**
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
index c32a520..e02efba 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@@ -52,19 +52,25 @@ abstract class DisjunctionScorer extends Scorer {
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
- boolean hasApproximation = false;
+ float sumMatchCost = 0;
+ long sumApproxCost = 0;
+
+ // Compute matchCost as the avarage over the matchCost of the subScorers.
+ // This is weighted by the cost, which is an expected number of matching documents.
for (DisiWrapper<Scorer> w : subScorers) {
if (w.twoPhaseView != null) {
- hasApproximation = true;
- break;
+ long costWeight = (w.cost <= 1) ? 1 : w.cost;
+ sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
+ sumApproxCost += costWeight;
}
}
- if (! hasApproximation) {
- // none of the sub scorers supports approximations
+ if (sumApproxCost == 0) { // no sub scorer supports approximations
return null;
}
+ final float matchCost = sumMatchCost / sumApproxCost;
+
// note it is important to share the same pq as this scorer so that
// rebalancing the pq through the approximation will also rebalance
// the pq in this scorer.
@@ -105,6 +111,11 @@ abstract class DisjunctionScorer extends Scorer {
DisjunctionScorer.this.topScorers = topScorers;
return true;
}
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
};
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index 48060ef..248a948 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -44,9 +44,11 @@ final class ExactPhraseScorer extends Scorer {
private final Similarity.SimScorer docScorer;
private final boolean needsScores;
+ private float matchCost;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
+ Similarity.SimScorer docScorer, boolean needsScores,
+ float matchCost) throws IOException {
super(weight);
this.docScorer = docScorer;
this.needsScores = needsScores;
@@ -59,6 +61,7 @@ final class ExactPhraseScorer extends Scorer {
}
conjunction = ConjunctionDISI.intersect(iterators);
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
+ this.matchCost = matchCost;
}
@Override
@@ -68,6 +71,11 @@ final class ExactPhraseScorer extends Scorer {
public boolean matches() throws IOException {
return phraseFreq() > 0;
}
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
};
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index f29d86a..58620fa 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -189,6 +189,7 @@ public class MultiPhraseQuery extends Query {
// Reuse single TermsEnum below:
final TermsEnum termsEnum = fieldTerms.iterator();
+ float totalMatchCost = 0;
for (int pos=0; pos<postingsFreqs.length; pos++) {
Term[] terms = termArrays.get(pos);
@@ -199,6 +200,7 @@ public class MultiPhraseQuery extends Query {
if (termState != null) {
termsEnum.seekExact(term.bytes(), termState);
postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
+ totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
}
}
@@ -222,9 +224,13 @@ public class MultiPhraseQuery extends Query {
}
if (slop == 0) {
- return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
+ return new ExactPhraseScorer(this, postingsFreqs,
+ similarity.simScorer(stats, context),
+ needsScores, totalMatchCost);
} else {
- return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
+ return new SloppyPhraseScorer(this, postingsFreqs, slop,
+ similarity.simScorer(stats, context),
+ needsScores, totalMatchCost);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index fd3cddf..049cfbe 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -405,6 +405,7 @@ public class PhraseQuery extends Query {
// Reuse single TermsEnum below:
final TermsEnum te = fieldTerms.iterator();
+ float totalMatchCost = 0;
for (int i = 0; i < terms.length; i++) {
final Term t = terms[i];
@@ -416,6 +417,7 @@ public class PhraseQuery extends Query {
te.seekExact(t.bytes(), state);
PostingsEnum postingsEnum = te.postings(null, PostingsEnum.POSITIONS);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
+ totalMatchCost += termPositionsCost(te);
}
// sort by increasing docFreq order
@@ -424,9 +426,13 @@ public class PhraseQuery extends Query {
}
if (slop == 0) { // optimize exact case
- return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context), needsScores);
+ return new ExactPhraseScorer(this, postingsFreqs,
+ similarity.simScorer(stats, context),
+ needsScores, totalMatchCost);
} else {
- return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context), needsScores);
+ return new SloppyPhraseScorer(this, postingsFreqs, slop,
+ similarity.simScorer(stats, context),
+ needsScores, totalMatchCost);
}
}
@@ -456,6 +462,42 @@ public class PhraseQuery extends Query {
}
}
+ /** A guess of
+ * the average number of simple operations for the initial seek and buffer refill
+ * per document for the positions of a term.
+ * See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
+ * <p>
+ * Aside: Instead of being constant this could depend among others on
+ * {@link Lucene50PostingsFormat#BLOCK_SIZE},
+ * {@link TermsEnum#docFreq()},
+ * {@link TermsEnum#totalTermFreq()},
+ * {@link DocIdSetIterator#cost()} (expected number of matching docs),
+ * {@link LeafReader#maxDoc()} (total number of docs in the segment),
+ * and the seek time and block size of the device storing the index.
+ */
+ private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
+
+ /** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
+ * when no seek or buffer refill is done.
+ */
+ private static final int TERM_OPS_PER_POS = 7;
+
+ /** Returns an expected cost in simple operations
+ * of processing the occurrences of a term
+ * in a document that contains the term.
+ * This is for use by {@link #matchCost} implementations.
+ * <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
+ * @param termsEnum The term is the term at which this TermsEnum is positioned.
+ */
+ static float termPositionsCost(TermsEnum termsEnum) throws IOException {
+ int docFreq = termsEnum.docFreq();
+ assert docFreq > 0;
+ long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
+ float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
+ return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
+ }
+
+
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new PhraseWeight(searcher, needsScores);
diff --git a/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java b/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java
index 5e920cb..2d25e29 100644
--- a/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/RandomAccessWeight.java
@@ -62,6 +62,11 @@ public abstract class RandomAccessWeight extends ConstantScoreWeight {
return matchingDocs.get(doc);
}
+
+ @Override
+ public float matchCost() {
+ return 10; // TODO: use some cost of matchingDocs
+ }
};
return new ConstantScoreScorer(this, score(), twoPhase);
diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
index 125d887..d401cde 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
@@ -149,6 +149,10 @@ class ReqExclScorer extends Scorer {
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
}
+ @Override
+ public float matchCost() {
+ return reqTwoPhaseIterator.matchCost(); // TODO: also use cost of exclApproximation.advance()
+ }
};
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
index 4ee2bf6..5a5cae6 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -52,9 +52,11 @@ final class SloppyPhraseScorer extends Scorer {
private int numMatches;
final boolean needsScores;
+ private final float matchCost;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- int slop, Similarity.SimScorer docScorer, boolean needsScores) {
+ int slop, Similarity.SimScorer docScorer, boolean needsScores,
+ float matchCost) {
super(weight);
this.docScorer = docScorer;
this.needsScores = needsScores;
@@ -68,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer {
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
}
conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
+ this.matchCost = matchCost;
}
/**
@@ -596,6 +599,16 @@ final class SloppyPhraseScorer extends Scorer {
sloppyFreq = phraseFreq(); // check for phrase
return sloppyFreq != 0F;
}
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
+
+ @Override
+ public String toString() {
+ return "SloppyPhraseScorer@asTwoPhaseIterator(" + SloppyPhraseScorer.this + ")";
+ }
};
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
index 3d774c5..ff22e5d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
+import org.apache.lucene.index.TermsEnum;
/**
* Returned by {@link Scorer#asTwoPhaseIterator()}
* to expose an approximation of a {@link DocIdSetIterator}.
@@ -84,15 +85,23 @@ public abstract class TwoPhaseIterator {
return approximation;
}
- /** Return whether the current doc ID that the iterator is on matches. This
+ /** Return whether the current doc ID that {@link #approximation()} is on matches. This
* method should only be called when the iterator is positioned -- ie. not
* when {@link DocIdSetIterator#docID()} is {@code -1} or
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
public abstract boolean matches() throws IOException;
+ /** An estimate of the expected cost to determine that a single document {@link #matches()}.
+ * This can be called before iterating the documents of {@link #approximation()}.
+ * Returns an expected cost in number of simple operations like addition, multiplication,
+ * comparing two numbers and indexing an array.
+ * The returned value must be positive.
+ */
+ public abstract float matchCost();
+
/**
* Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
- * when available * otherwise returns null.
+ * when available, otherwise returns null.
*/
public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
return (iter instanceof Scorer)
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java
index fcc2484..533714d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/ConjunctionSpans.java
@@ -88,14 +88,34 @@ abstract class ConjunctionSpans extends Spans {
*/
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
- TwoPhaseIterator res = new TwoPhaseIterator(conjunction) {
+ float totalMatchCost = 0;
+ // Compute the matchCost as the total matchCost/positionsCostant of the sub spans.
+ for (Spans spans : subSpans) {
+ TwoPhaseIterator tpi = spans.asTwoPhaseIterator();
+ if (tpi != null) {
+ totalMatchCost += tpi.matchCost();
+ } else {
+ totalMatchCost += spans.positionsCost();
+ }
+ }
+ final float matchCost = totalMatchCost;
+ return new TwoPhaseIterator(conjunction) {
@Override
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
};
- return res;
+ }
+
+ @Override
+ public float positionsCost() {
+ throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null here.
}
public Spans[] getSubSpans() {
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
index e4ec1b5..1db08aa 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
@@ -142,6 +142,16 @@ public abstract class FilterSpans extends Spans {
public boolean matches() throws IOException {
return inner.matches() && twoPhaseCurrentDocMatches();
}
+
+ @Override
+ public float matchCost() {
+ return inner.matchCost(); // underestimate
+ }
+
+ @Override
+ public String toString() {
+ return "FilterSpans@asTwoPhaseIterator(inner=" + inner + ", in=" + in + ")";
+ }
};
} else {
// wrapped instance has no approximation, but
@@ -151,10 +161,25 @@ public abstract class FilterSpans extends Spans {
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
+
+ @Override
+ public float matchCost() {
+ return in.positionsCost(); // overestimate
+ }
+
+ @Override
+ public String toString() {
+ return "FilterSpans@asTwoPhaseIterator(in=" + in + ")";
+ }
};
}
}
+ @Override
+ public float positionsCost() {
+ throw new UnsupportedOperationException(); // asTwoPhaseIterator never returns null
+ }
+
/**
* Returns true if the current document matches.
* <p>
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
index bd40add..cf92e6f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
@@ -134,6 +134,11 @@ public class NearSpansUnordered extends ConjunctionSpans {
}
@Override
+ public float positionsCost() {
+ return in.positionsCost();
+ }
+
+ @Override
public int docID() {
return in.docID();
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java
index a409477..6274466 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/ScoringWrapperSpans.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.TwoPhaseIterator;
/**
* A Spans that wraps another Spans with a different SimScorer
@@ -82,4 +83,14 @@ public class ScoringWrapperSpans extends Spans {
public long cost() {
return in.cost();
}
+
+ @Override
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ return in.asTwoPhaseIterator();
+ }
+
+ @Override
+ public float positionsCost() {
+ return in.positionsCost();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
index 3fd1703..33c7d92 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
@@ -384,6 +384,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
public long cost() {
return 0;
}
+
+ @Override
+ public float positionsCost() {
+ throw new UnsupportedOperationException();
+ }
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
index 9c39f41..6fadd60 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
@@ -210,26 +210,58 @@ public final class SpanOrQuery extends SpanQuery {
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
- boolean hasApproximation = false;
+ float sumMatchCost = 0; // See also DisjunctionScorer.asTwoPhaseIterator()
+ long sumApproxCost = 0;
+
for (DisiWrapper<Spans> w : byDocQueue) {
if (w.twoPhaseView != null) {
- hasApproximation = true;
- break;
+ long costWeight = (w.cost <= 1) ? 1 : w.cost;
+ sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
+ sumApproxCost += costWeight;
}
}
- if (!hasApproximation) { // none of the sub spans supports approximations
+ if (sumApproxCost == 0) { // no sub spans supports approximations
+ computePositionsCost();
return null;
}
+ final float matchCost = sumMatchCost / sumApproxCost;
+
return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
@Override
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
};
}
+ float positionsCost = -1;
+
+ void computePositionsCost() {
+ float sumPositionsCost = 0;
+ long sumCost = 0;
+ for (DisiWrapper<Spans> w : byDocQueue) {
+ long costWeight = (w.cost <= 1) ? 1 : w.cost;
+ sumPositionsCost += w.iterator.positionsCost() * costWeight;
+ sumCost += costWeight;
+ }
+ positionsCost = sumPositionsCost / sumCost;
+ }
+
+ @Override
+ public float positionsCost() {
+ // This may be called when asTwoPhaseIterator returned null,
+ // which happens when none of the sub spans supports approximations.
+ assert positionsCost > 0;
+ return positionsCost;
+ }
+
int lastDocTwoPhaseMatched = -1;
boolean twoPhaseCurrentDocMatches() throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
index be75575..4799295 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
@@ -33,6 +33,7 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TwoPhaseIterator;
/** Matches spans containing a term.
* This should not be used for terms that are indexed at position Integer.MAX_VALUE.
@@ -117,10 +118,40 @@ public class SpanTermQuery extends SpanQuery {
termsEnum.seekExact(term.bytes(), state);
final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings());
- return new TermSpans(this, getSimScorer(context), postings, term);
+ float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
+ return new TermSpans(this, getSimScorer(context), postings, term, positionsCost);
}
}
+ /** A guess of
+ * the relative cost of dealing with the term positions
+ * when using a SpanNearQuery instead of a PhraseQuery.
+ */
+ private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;
+
+ private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
+
+ private static final int TERM_OPS_PER_POS = 7;
+
+ /** Returns an expected cost in simple operations
+ * of processing the occurrences of a term
+ * in a document that contains the term.
+ * <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
+ * @param termsEnum The term is the term at which this TermsEnum is positioned.
+ * <p>
+ * This is a copy of {@link org.apache.lucene.search.PhraseQuery.termPositionsCost()}.
+ * <br>
+ * TODO: keep only a single copy of this method and the constants used in it
+ * when SpanTermQuery moves to the o.a.l.search package.
+ */
+ static float termPositionsCost(TermsEnum termsEnum) throws IOException {
+ int docFreq = termsEnum.docFreq();
+ assert docFreq > 0;
+ long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
+ float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
+ return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
+ }
+
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
index fff328a..3f7ff4f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
@@ -86,6 +86,16 @@ public abstract class Spans extends Scorer {
*/
public abstract void collect(SpanCollector collector) throws IOException;
+ /**
+ * Return an estimation of the cost of using the positions of
+ * this {@link Spans} for any single document, but only after
+ * {@link #asTwoPhaseIterator} returned {@code null}.
+ * Otherwise this method should not be called.
+ * The returned value is independent of the current document.
+ *
+ * @lucene.experimental
+ */
+ public abstract float positionsCost();
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
index 802b761..68f3cd4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
@@ -37,13 +37,17 @@ public class TermSpans extends Spans {
protected int count;
protected int position;
protected boolean readPayload;
+ private final float positionsCost;
- public TermSpans(SpanWeight weight, Similarity.SimScorer scorer, PostingsEnum postings, Term term) {
+ public TermSpans(SpanWeight weight, Similarity.SimScorer scorer,
+ PostingsEnum postings, Term term, float positionsCost) {
super(weight, scorer);
this.postings = Objects.requireNonNull(postings);
this.term = Objects.requireNonNull(term);
this.doc = -1;
this.position = -1;
+ assert positionsCost > 0; // otherwise the TermSpans should not be created.
+ this.positionsCost = positionsCost;
}
@Override
@@ -119,6 +123,11 @@ public class TermSpans extends Spans {
}
@Override
+ public float positionsCost() {
+ return positionsCost;
+ }
+
+ @Override
public String toString() {
return "spans(" + term.toString() + ")@" +
(doc == -1 ? "START" : (doc == NO_MORE_DOCS) ? "ENDDOC"
@@ -128,5 +137,4 @@ public class TermSpans extends Spans {
public PostingsEnum getPostings() {
return postings;
}
-
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
index f62b19d..c907e6e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
@@ -37,6 +37,11 @@ public class TestConjunctionDISI extends LuceneTestCase {
public boolean matches() throws IOException {
return confirmed.get(iterator.docID());
}
+
+ @Override
+ public float matchCost() {
+ return 5; // #operations in FixedBitSet#get()
+ }
};
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
index ed91bc6..f5680e9 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
@@ -82,6 +82,11 @@ final class JustCompileSearchSpans {
public long cost() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
+
+ @Override
+ public float positionsCost() {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
}
static final class JustCompileSpanQuery extends SpanQuery {
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
index a819f9b..ee51e2f 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
@@ -176,6 +176,11 @@ public final class DoubleRange extends Range {
public boolean matches() throws IOException {
return range.accept(values.doubleVal(approximation.docID()));
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of range.accept()
+ }
};
return new ConstantScoreScorer(this, score(), twoPhase);
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
index 66f6e2e..254bc8a 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
@@ -168,6 +168,11 @@ public final class LongRange extends Range {
public boolean matches() throws IOException {
return range.accept(values.longVal(approximation.docID()));
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of range.accept()
+ }
};
return new ConstantScoreScorer(this, score(), twoPhase);
}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
index 366932b..e0c7880 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java
@@ -184,6 +184,11 @@ final class GlobalOrdinalsQuery extends Query {
}
return false;
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
+ }
};
}
}
@@ -225,6 +230,11 @@ final class GlobalOrdinalsQuery extends Query {
}
return false;
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of values.getOrd() and foundOrds.get()
+ }
};
}
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
index 385b302..c7763b7 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java
@@ -211,6 +211,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
return false;
}
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of values.getOrd() and collector.score()
+ }
};
}
}
@@ -253,6 +257,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
}
return false;
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost.getOrd() of values and collector.score()
+ }
};
}
}
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
index c8e946e..a071a95 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
@@ -53,6 +53,11 @@ public abstract class ValueSourceScorer extends Scorer {
public boolean matches() throws IOException {
return ValueSourceScorer.this.matches(docID());
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of ValueSourceScorer.this.matches()
+ }
};
this.disi = TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator);
}
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java
index c805581..9602bd6 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java
@@ -274,6 +274,11 @@ public class PayloadScoreQuery extends SpanQuery {
public long cost() {
return in.cost();
}
+
+ @Override
+ public float positionsCost() {
+ return in.positionsCost();
+ }
}
}
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java
index d49fb41..a7ccfb5 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java
@@ -108,6 +108,11 @@ public class CompositeVerifyQuery extends Query {
public boolean matches() throws IOException {
return predFuncValues.boolVal(indexQueryScorer.docID());
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of predFuncValues.boolVal()
+ }
};
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java
index 798550f..7810c21 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java
@@ -130,6 +130,11 @@ public class IntersectsRPTVerifyQuery extends Query {
return predFuncValues.boolVal(doc);
}
+
+ @Override
+ public float matchCost() {
+ return 100; // TODO: use cost of exactIterator.advance() and predFuncValues.boolVal()
+ }
};
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java
index 2bc61ef..78f6f6c 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java
@@ -195,6 +195,19 @@ public class AssertingScorer extends Scorer {
}
return matches;
}
+
+ @Override
+ public float matchCost() {
+ float matchCost = in.matchCost();
+ assert ! Float.isNaN(matchCost);
+ assert matchCost >= 0;
+ return matchCost;
+ }
+
+ @Override
+ public String toString() {
+ return "AssertingScorer@asTwoPhaseIterator(" + in + ")";
+ }
};
}
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java
index 88cfd77..53a3610 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/RandomApproximationQuery.java
@@ -172,10 +172,12 @@ public class RandomApproximationQuery extends Query {
private final DocIdSetIterator disi;
private int lastDoc = -1;
+ private final float randomMatchCost;
RandomTwoPhaseView(Random random, DocIdSetIterator disi) {
super(new RandomApproximation(random, disi));
this.disi = disi;
+ this.randomMatchCost = random.nextFloat() * 200; // between 0 and 200
}
@Override
@@ -190,6 +192,11 @@ public class RandomApproximationQuery extends Query {
return approximation.docID() == disi.docID();
}
+ @Override
+ public float matchCost() {
+ TwoPhaseIterator tpi = TwoPhaseIterator.asTwoPhaseIterator(approximation);
+ return (tpi == null) ? randomMatchCost : tpi.matchCost(); // TODO: is randomMatchCost ok?
+ }
}
private static class RandomApproximation extends DocIdSetIterator {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java
index 89a4ed2..18053a9 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java
@@ -191,6 +191,14 @@ class AssertingSpans extends Spans {
}
@Override
+ public float positionsCost() {
+ float cost = in.positionsCost();
+ assert ! Float.isNaN(cost) : "positionsCost() should not be NaN";
+ assert cost > 0 : "positionsCost() must be positive";
+ return cost;
+ }
+
+ @Override
protected float scoreCurrentDoc() throws IOException {
assert in.docScorer != null : in.getClass() + " has no docScorer!";
return in.scoreCurrentDoc();
@@ -229,6 +237,18 @@ class AssertingSpans extends Spans {
}
return v;
}
+
+ @Override
+ public float matchCost() {
+ float cost = in.matchCost();
+ if (Float.isNaN(cost)) {
+ throw new AssertionError("matchCost()=" + cost + " should not be NaN on doc ID " + approximation.docID());
+ }
+ if (cost < 0) {
+ throw new AssertionError("matchCost()=" + cost + " should be non negative on doc ID " + approximation.docID());
+ }
+ return cost;
+ }
}
class AssertingDISI extends DocIdSetIterator {
diff --git a/solr/core/src/java/org/apache/solr/search/Filter.java b/solr/core/src/java/org/apache/solr/search/Filter.java
index 6f968a8..98c5d2d 100644
--- a/solr/core/src/java/org/apache/solr/search/Filter.java
+++ b/solr/core/src/java/org/apache/solr/search/Filter.java
@@ -129,6 +129,11 @@ public abstract class Filter extends Query {
public boolean matches() throws IOException {
return bits.get(approximation.docID());
}
+
+ @Override
+ public float matchCost() {
+ return 10; // TODO use cost of bits.get()
+ }
};
return new ConstantScoreScorer(this, 0f, twoPhase);
}