blob: c896ad14fd71a43592cdb02932c102d3abd2b545 [file] [log] [blame]
Index: lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java (revision 1719161)
+++ lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java (working copy)
@@ -27,6 +27,7 @@
public final DocIdSetIterator iterator;
public final Scorer scorer;
public final long cost;
+ public final float matchCost; // the match cost for two-phase iterators, 0 otherwise
public int doc; // the current doc, used for comparison
public DisiWrapper next; // reference to a next element, see #topList
@@ -52,8 +53,10 @@
if (twoPhaseView != null) {
approximation = twoPhaseView.approximation();
+ matchCost = twoPhaseView.matchCost();
} else {
approximation = iterator;
+ matchCost = 0f;
}
}
@@ -67,8 +70,10 @@
if (twoPhaseView != null) {
approximation = twoPhaseView.approximation();
+ matchCost = twoPhaseView.matchCost();
} else {
approximation = iterator;
+ matchCost = 0f;
}
this.lastApproxNonMatchDoc = -2;
this.lastApproxMatchDoc = -2;
Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (revision 1719161)
+++ lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (working copy)
@@ -22,6 +22,8 @@
import java.util.Collection;
import java.util.List;
+import org.apache.lucene.util.PriorityQueue;
+
/**
* Base class for Scorers that score disjunctions.
*/
@@ -28,11 +30,10 @@
abstract class DisjunctionScorer extends Scorer {
private final boolean needsScores;
- final DisiPriorityQueue subScorers;
- private final long cost;
- /** Linked list of scorers which are on the current doc */
- private DisiWrapper topScorers;
+ private final DisiPriorityQueue subScorers;
+ private final DisjunctionDISIApproximation approximation;
+ private final TwoPhase twoPhase;
protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
super(weight);
@@ -40,125 +41,125 @@
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
this.subScorers = new DisiPriorityQueue(subScorers.size());
- long cost = 0;
for (Scorer scorer : subScorers) {
final DisiWrapper w = new DisiWrapper(scorer);
- cost += w.cost;
this.subScorers.add(w);
}
- this.cost = cost;
this.needsScores = needsScores;
- }
+ this.approximation = new DisjunctionDISIApproximation(this.subScorers);
- @Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
-
- @Override
- public int docID() {
- return subScorers.top().doc;
- }
-
- @Override
- public final int nextDoc() throws IOException {
- topScorers = null;
- DisiWrapper top = subScorers.top();
- final int doc = top.doc;
- do {
- top.doc = top.iterator.nextDoc();
- top = subScorers.updateTop();
- } while (top.doc == doc);
-
- return top.doc;
- }
-
- @Override
- public final int advance(int target) throws IOException {
- topScorers = null;
- DisiWrapper top = subScorers.top();
- do {
- top.doc = top.iterator.advance(target);
- top = subScorers.updateTop();
- } while (top.doc < target);
-
- return top.doc;
- }
-
- @Override
- public final long cost() {
- return cost;
- }
-
- };
- }
-
- @Override
- public TwoPhaseIterator twoPhaseIterator() {
+ boolean hasApproximation = false;
float sumMatchCost = 0;
long sumApproxCost = 0;
-
- // Compute matchCost as the avarage over the matchCost of the subScorers.
+ // Compute matchCost as the average over the matchCost of the subScorers.
// This is weighted by the cost, which is an expected number of matching documents.
- for (DisiWrapper w : subScorers) {
+ for (DisiWrapper w : this.subScorers) {
+ long costWeight = (w.cost <= 1) ? 1 : w.cost;
+ sumApproxCost += costWeight;
if (w.twoPhaseView != null) {
- long costWeight = (w.cost <= 1) ? 1 : w.cost;
- sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
- sumApproxCost += costWeight;
+ hasApproximation = true;
+ sumMatchCost += w.matchCost * costWeight;
}
}
- if (sumApproxCost == 0) { // no sub scorer supports approximations
- return null;
+ if (hasApproximation == false) { // no sub scorer supports approximations
+ twoPhase = null;
+ } else {
+ final float matchCost = sumMatchCost / sumApproxCost;
+ twoPhase = new TwoPhase(approximation, matchCost);
}
+ }
- final float matchCost = sumMatchCost / sumApproxCost;
+ @Override
+ public DocIdSetIterator iterator() {
+ if (twoPhase != null) {
+ return TwoPhaseIterator.asDocIdSetIterator(twoPhase);
+ } else {
+ return approximation;
+ }
+ }
- // note it is important to share the same pq as this scorer so that
- // rebalancing the pq through the approximation will also rebalance
- // the pq in this scorer.
- return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
+ @Override
+ public TwoPhaseIterator twoPhaseIterator() {
+ return twoPhase;
+ }
- @Override
- public boolean matches() throws IOException {
- DisiWrapper topScorers = subScorers.topList();
- // remove the head of the list as long as it does not match
- while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) {
- topScorers = topScorers.next;
- if (topScorers == null) {
- return false;
- }
+ private class TwoPhase extends TwoPhaseIterator {
+
+ private final float matchCost;
+ // list of verified matches on the current doc
+ DisiWrapper verifiedMatches;
+ // priority queue of approximations on the current doc that have not been verified yet
+ final PriorityQueue<DisiWrapper> unverifiedMatches;
+
+ private TwoPhase(DocIdSetIterator approximation, float matchCost) {
+ super(approximation);
+ this.matchCost = matchCost;
+ unverifiedMatches = new PriorityQueue<DisiWrapper>(DisjunctionScorer.this.subScorers.size()) {
+ @Override
+ protected boolean lessThan(DisiWrapper a, DisiWrapper b) {
+ return a.matchCost < b.matchCost;
}
- // now we know we have at least one match since the first element of 'matchList' matches
- if (needsScores) {
- // if scores or freqs are needed, we also need to remove scorers
- // from the top list that do not actually match
- DisiWrapper previous = topScorers;
- for (DisiWrapper w = topScorers.next; w != null; w = w.next) {
- if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
- // w does not match, remove it
- previous.next = w.next;
- } else {
- previous = w;
- }
+ };
+ }
+
+ DisiWrapper getSubMatches() throws IOException {
+ // iteration order does not matter
+ for (DisiWrapper w : unverifiedMatches) {
+ if (w.twoPhaseView.matches()) {
+ w.next = verifiedMatches;
+ verifiedMatches = w;
+ }
+ }
+ unverifiedMatches.clear();
+ return verifiedMatches;
+ }
+
+ @Override
+ public boolean matches() throws IOException {
+ verifiedMatches = null;
+ unverifiedMatches.clear();
+
+ for (DisiWrapper w = subScorers.topList(); w != null; ) {
+ DisiWrapper next = w.next;
+
+ if (w.twoPhaseView == null) {
+ // implicitly verified, move it to verifiedMatches
+ w.next = verifiedMatches;
+ verifiedMatches = w;
+
+ if (needsScores == false) {
+ // we can stop here
+ return true;
}
} else {
- // since we don't need scores, let's pretend we have a single match
- topScorers.next = null;
+ unverifiedMatches.add(w);
}
-
- // We need to explicitely set the list of top scorers to avoid the
- // laziness of DisjunctionScorer.score() that would take all scorers
- // positioned on the same doc as the top of the pq, including
- // non-matching scorers
- DisjunctionScorer.this.topScorers = topScorers;
+ w = next;
+ }
+
+ if (verifiedMatches != null) {
return true;
}
-
- @Override
- public float matchCost() {
- return matchCost;
+
+ // verify subs that have an two-phase iterator
+ // least-costly ones first
+ while (unverifiedMatches.size() > 0) {
+ DisiWrapper w = unverifiedMatches.pop();
+ if (w.twoPhaseView.matches()) {
+ w.next = null;
+ verifiedMatches = w;
+ return true;
+ }
}
- };
+
+ return false;
+ }
+
+ @Override
+ public float matchCost() {
+ return matchCost;
+ }
}
@Override
@@ -166,13 +167,19 @@
return subScorers.top().doc;
}
+ DisiWrapper getSubMatches() throws IOException {
+ if (twoPhase == null) {
+ return subScorers.topList();
+ } else {
+ return twoPhase.getSubMatches();
+ }
+ }
+
@Override
public final int freq() throws IOException {
- if (topScorers == null) {
- topScorers = subScorers.topList();
- }
+ DisiWrapper subMatches = getSubMatches();
int freq = 1;
- for (DisiWrapper w = topScorers.next; w != null; w = w.next) {
+ for (DisiWrapper w = subMatches.next; w != null; w = w.next) {
freq += 1;
}
return freq;
@@ -180,10 +187,7 @@
@Override
public final float score() throws IOException {
- if (topScorers == null) {
- topScorers = subScorers.topList();
- }
- return score(topScorers);
+ return score(getSubMatches());
}
/** Compute the score for the given linked list of scorers. */
Index: lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java (revision 1719161)
+++ lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java (working copy)
@@ -167,7 +167,7 @@
final float freq;
if (scorer instanceof SynonymScorer) {
SynonymScorer synScorer = (SynonymScorer) scorer;
- freq = synScorer.tf(synScorer.subScorers.topList());
+ freq = synScorer.tf(synScorer.getSubMatches());
} else {
assert scorer instanceof TermScorer;
freq = scorer.freq();
Index: lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java (revision 1719161)
+++ lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java (working copy)
@@ -1,5 +1,8 @@
package org.apache.lucene.util;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -27,10 +30,12 @@
* <code>maxSize+1</code> if instantiated via the
* {@link #PriorityQueue(int,boolean)} constructor with <code>prepopulate</code>
* set to <code>true</code>.
- *
+ *
+ * <b>NOTE</b>: Iteration order is not specified.
+ *
* @lucene.internal
*/
-public abstract class PriorityQueue<T> {
+public abstract class PriorityQueue<T> implements Iterable<T> {
private int size = 0;
private final int maxSize;
private final T[] heap;
@@ -58,7 +63,7 @@
@SuppressWarnings("unchecked") final T[] h = (T[]) new Object[heapSize];
this.heap = h;
this.maxSize = maxSize;
-
+
if (prepopulate) {
// If sentinel objects are supported, populate the queue with them
T sentinel = getSentinelObject();
@@ -80,22 +85,22 @@
/**
* This method can be overridden by extending classes to return a sentinel
- * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)}
+ * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)}
* constructor to fill the queue, so that the code which uses that queue can always
* assume it's full and only change the top without attempting to insert any new
* object.<br>
- *
+ *
* Those sentinel values should always compare worse than any non-sentinel
* value (i.e., {@link #lessThan} should always favor the
* non-sentinel values).<br>
- *
+ *
* By default, this method returns null, which means the queue will not be
* filled with sentinel values. Otherwise, the value returned will be used to
* pre-populate the queue. Adds sentinel values to the queue.<br>
- *
+ *
* If this method is extended to return a non-null value, then the following
* usage pattern is recommended:
- *
+ *
* <pre class="prettyprint">
* // extends getSentinelObject() to return a non-null value.
* PriorityQueue&lt;MyObject&gt; pq = new MyQueue&lt;MyObject&gt;(numHits);
@@ -102,19 +107,19 @@
* // save the 'top' element, which is guaranteed to not be null.
* MyObject pqTop = pq.top();
* &lt;...&gt;
- * // now in order to add a new element, which is 'better' than top (after
+ * // now in order to add a new element, which is 'better' than top (after
* // you've verified it is better), it is as simple as:
* pqTop.change().
* pqTop = pq.updateTop();
* </pre>
- *
+ *
* <b>NOTE:</b> if this method returns a non-null value, it will be called by
- * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor
+ * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor
* {@link #size()} times, relying on a new object to be returned and will not
* check if it's null again. Therefore you should ensure any call to this
* method creates a new instance and behaves consistently, e.g., it cannot
* return null if it previously returned non-null.
- *
+ *
* @return the sentinel object to use to pre-populate the queue, or null if
* sentinel objects are not supported.
*/
@@ -126,7 +131,7 @@
* Adds an Object to a PriorityQueue in log(size) time. If one tries to add
* more objects than maxSize from initialize an
* {@link ArrayIndexOutOfBoundsException} is thrown.
- *
+ *
* @return the new 'top' element in the queue.
*/
public final T add(T element) {
@@ -182,24 +187,24 @@
return null;
}
}
-
+
/**
* Should be called when the Object at top changes values. Still log(n) worst
* case, but it's at least twice as fast to
- *
+ *
* <pre class="prettyprint">
* pq.top().change();
* pq.updateTop();
* </pre>
- *
+ *
* instead of
- *
+ *
* <pre class="prettyprint">
* o = pq.pop();
* o.change();
* pq.push(o);
* </pre>
- *
+ *
* @return the new 'top' element.
*/
public final T updateTop() {
@@ -263,7 +268,7 @@
heap[i] = node; // install saved node
return i != origPos;
}
-
+
private final void downHeap(int i) {
T node = heap[i]; // save top node
int j = i << 1; // find smaller child
@@ -289,4 +294,26 @@
protected final Object[] getHeapArray() {
return (Object[]) heap;
}
+
+ @Override
+ public Iterator<T> iterator() {
+ return new Iterator<T>() {
+
+ int i = 1;
+
+ @Override
+ public boolean hasNext() {
+ return i <= size;
+ }
+
+ @Override
+ public T next() {
+ if (hasNext() == false) {
+ throw new NoSuchElementException();
+ }
+ return heap[i++];
+ }
+
+ };
+ }
}
Index: lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java (revision 1719161)
+++ lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java (working copy)
@@ -18,6 +18,9 @@
*/
import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
import java.util.Random;
public class TestPriorityQueue extends LuceneTestCase {
@@ -188,4 +191,65 @@
}
}
+ public void testIterator() {
+ IntegerQueue queue = new IntegerQueue(3);
+
+ Iterator<Integer> it = queue.iterator();
+ assertFalse(it.hasNext());
+ try {
+ it.next();
+ fail();
+ } catch (NoSuchElementException e) {
+ // ok
+ }
+
+ queue.add(1);
+ it = queue.iterator();
+ assertTrue(it.hasNext());
+ assertEquals(Integer.valueOf(1), it.next());
+ assertFalse(it.hasNext());
+ try {
+ it.next();
+ fail();
+ } catch (NoSuchElementException e) {
+ // ok
+ }
+
+ queue.add(2);
+ it = queue.iterator();
+ assertTrue(it.hasNext());
+ assertEquals(Integer.valueOf(1), it.next());
+ assertTrue(it.hasNext());
+ assertEquals(Integer.valueOf(2), it.next());
+ assertFalse(it.hasNext());
+ try {
+ it.next();
+ fail();
+ } catch (NoSuchElementException e) {
+ // ok
+ }
+ }
+
+ public void testIteratorRandom() {
+ final int maxSize = TestUtil.nextInt(random(), 1, 20);
+ IntegerQueue queue = new IntegerQueue(maxSize);
+ final int iters = atLeast(100);
+ final List<Integer> expected = new ArrayList<>();
+ for (int iter = 0; iter < iters; ++iter) {
+ if (queue.size() == 0 || (queue.size() < maxSize && random().nextBoolean())) {
+ final Integer value = new Integer(random().nextInt(10));
+ queue.add(value);
+ expected.add(value);
+ } else {
+ expected.remove(queue.pop());
+ }
+ List<Integer> actual = new ArrayList<>();
+ for (Integer value : queue) {
+ actual.add(value);
+ }
+ CollectionUtil.introSort(expected);
+ CollectionUtil.introSort(actual);
+ assertEquals(expected, actual);
+ }
+ }
}