| Index: lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java (revision 1719161) |
| +++ lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java (working copy) |
| @@ -27,6 +27,7 @@ |
| public final DocIdSetIterator iterator; |
| public final Scorer scorer; |
| public final long cost; |
| + public final float matchCost; // the match cost for two-phase iterators, 0 otherwise |
| public int doc; // the current doc, used for comparison |
| public DisiWrapper next; // reference to a next element, see #topList |
| |
| @@ -52,8 +53,10 @@ |
| |
| if (twoPhaseView != null) { |
| approximation = twoPhaseView.approximation(); |
| + matchCost = twoPhaseView.matchCost(); |
| } else { |
| approximation = iterator; |
| + matchCost = 0f; |
| } |
| } |
| |
| @@ -67,8 +70,10 @@ |
| |
| if (twoPhaseView != null) { |
| approximation = twoPhaseView.approximation(); |
| + matchCost = twoPhaseView.matchCost(); |
| } else { |
| approximation = iterator; |
| + matchCost = 0f; |
| } |
| this.lastApproxNonMatchDoc = -2; |
| this.lastApproxMatchDoc = -2; |
| Index: lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (revision 1719161) |
| +++ lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (working copy) |
| @@ -22,6 +22,8 @@ |
| import java.util.Collection; |
| import java.util.List; |
| |
| +import org.apache.lucene.util.PriorityQueue; |
| + |
| /** |
| * Base class for Scorers that score disjunctions. |
| */ |
| @@ -28,11 +30,10 @@ |
| abstract class DisjunctionScorer extends Scorer { |
| |
| private final boolean needsScores; |
| - final DisiPriorityQueue subScorers; |
| - private final long cost; |
| |
| - /** Linked list of scorers which are on the current doc */ |
| - private DisiWrapper topScorers; |
| + private final DisiPriorityQueue subScorers; |
| + private final DisjunctionDISIApproximation approximation; |
| + private final TwoPhase twoPhase; |
| |
| protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) { |
| super(weight); |
| @@ -40,125 +41,125 @@ |
| throw new IllegalArgumentException("There must be at least 2 subScorers"); |
| } |
| this.subScorers = new DisiPriorityQueue(subScorers.size()); |
| - long cost = 0; |
| for (Scorer scorer : subScorers) { |
| final DisiWrapper w = new DisiWrapper(scorer); |
| - cost += w.cost; |
| this.subScorers.add(w); |
| } |
| - this.cost = cost; |
| this.needsScores = needsScores; |
| - } |
| + this.approximation = new DisjunctionDISIApproximation(this.subScorers); |
| |
| - @Override |
| - public DocIdSetIterator iterator() { |
| - return new DocIdSetIterator() { |
| - |
| - @Override |
| - public int docID() { |
| - return subScorers.top().doc; |
| - } |
| - |
| - @Override |
| - public final int nextDoc() throws IOException { |
| - topScorers = null; |
| - DisiWrapper top = subScorers.top(); |
| - final int doc = top.doc; |
| - do { |
| - top.doc = top.iterator.nextDoc(); |
| - top = subScorers.updateTop(); |
| - } while (top.doc == doc); |
| - |
| - return top.doc; |
| - } |
| - |
| - @Override |
| - public final int advance(int target) throws IOException { |
| - topScorers = null; |
| - DisiWrapper top = subScorers.top(); |
| - do { |
| - top.doc = top.iterator.advance(target); |
| - top = subScorers.updateTop(); |
| - } while (top.doc < target); |
| - |
| - return top.doc; |
| - } |
| - |
| - @Override |
| - public final long cost() { |
| - return cost; |
| - } |
| - |
| - }; |
| - } |
| - |
| - @Override |
| - public TwoPhaseIterator twoPhaseIterator() { |
| + boolean hasApproximation = false; |
| float sumMatchCost = 0; |
| long sumApproxCost = 0; |
| - |
| - // Compute matchCost as the avarage over the matchCost of the subScorers. |
| + // Compute matchCost as the average over the matchCost of the subScorers. |
| // This is weighted by the cost, which is an expected number of matching documents. |
| - for (DisiWrapper w : subScorers) { |
| + for (DisiWrapper w : this.subScorers) { |
| + long costWeight = (w.cost <= 1) ? 1 : w.cost; |
| + sumApproxCost += costWeight; |
| if (w.twoPhaseView != null) { |
| - long costWeight = (w.cost <= 1) ? 1 : w.cost; |
| - sumMatchCost += w.twoPhaseView.matchCost() * costWeight; |
| - sumApproxCost += costWeight; |
| + hasApproximation = true; |
| + sumMatchCost += w.matchCost * costWeight; |
| } |
| } |
| |
| - if (sumApproxCost == 0) { // no sub scorer supports approximations |
| - return null; |
| + if (hasApproximation == false) { // no sub scorer supports approximations |
| + twoPhase = null; |
| + } else { |
| + final float matchCost = sumMatchCost / sumApproxCost; |
| + twoPhase = new TwoPhase(approximation, matchCost); |
| } |
| + } |
| |
| - final float matchCost = sumMatchCost / sumApproxCost; |
| + @Override |
| + public DocIdSetIterator iterator() { |
| + if (twoPhase != null) { |
| + return TwoPhaseIterator.asDocIdSetIterator(twoPhase); |
| + } else { |
| + return approximation; |
| + } |
| + } |
| |
| - // note it is important to share the same pq as this scorer so that |
| - // rebalancing the pq through the approximation will also rebalance |
| - // the pq in this scorer. |
| - return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) { |
| + @Override |
| + public TwoPhaseIterator twoPhaseIterator() { |
| + return twoPhase; |
| + } |
| |
| - @Override |
| - public boolean matches() throws IOException { |
| - DisiWrapper topScorers = subScorers.topList(); |
| - // remove the head of the list as long as it does not match |
| - while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) { |
| - topScorers = topScorers.next; |
| - if (topScorers == null) { |
| - return false; |
| - } |
| + private class TwoPhase extends TwoPhaseIterator { |
| + |
| + private final float matchCost; |
| + // list of verified matches on the current doc |
| + DisiWrapper verifiedMatches; |
| + // priority queue of approximations on the current doc that have not been verified yet |
| + final PriorityQueue<DisiWrapper> unverifiedMatches; |
| + |
| + private TwoPhase(DocIdSetIterator approximation, float matchCost) { |
| + super(approximation); |
| + this.matchCost = matchCost; |
| + unverifiedMatches = new PriorityQueue<DisiWrapper>(DisjunctionScorer.this.subScorers.size()) { |
| + @Override |
| + protected boolean lessThan(DisiWrapper a, DisiWrapper b) { |
| + return a.matchCost < b.matchCost; |
| } |
| - // now we know we have at least one match since the first element of 'matchList' matches |
| - if (needsScores) { |
| - // if scores or freqs are needed, we also need to remove scorers |
| - // from the top list that do not actually match |
| - DisiWrapper previous = topScorers; |
| - for (DisiWrapper w = topScorers.next; w != null; w = w.next) { |
| - if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) { |
| - // w does not match, remove it |
| - previous.next = w.next; |
| - } else { |
| - previous = w; |
| - } |
| + }; |
| + } |
| + |
| + DisiWrapper getSubMatches() throws IOException { |
| + // iteration order does not matter |
| + for (DisiWrapper w : unverifiedMatches) { |
| + if (w.twoPhaseView.matches()) { |
| + w.next = verifiedMatches; |
| + verifiedMatches = w; |
| + } |
| + } |
| + unverifiedMatches.clear(); |
| + return verifiedMatches; |
| + } |
| + |
| + @Override |
| + public boolean matches() throws IOException { |
| + verifiedMatches = null; |
| + unverifiedMatches.clear(); |
| + |
| + for (DisiWrapper w = subScorers.topList(); w != null; ) { |
| + DisiWrapper next = w.next; |
| + |
| + if (w.twoPhaseView == null) { |
| + // implicitly verified, move it to verifiedMatches |
| + w.next = verifiedMatches; |
| + verifiedMatches = w; |
| + |
| + if (needsScores == false) { |
| + // we can stop here |
| + return true; |
| } |
| } else { |
| - // since we don't need scores, let's pretend we have a single match |
| - topScorers.next = null; |
| + unverifiedMatches.add(w); |
| } |
| - |
| - // We need to explicitely set the list of top scorers to avoid the |
| - // laziness of DisjunctionScorer.score() that would take all scorers |
| - // positioned on the same doc as the top of the pq, including |
| - // non-matching scorers |
| - DisjunctionScorer.this.topScorers = topScorers; |
| + w = next; |
| + } |
| + |
| + if (verifiedMatches != null) { |
| return true; |
| } |
| - |
| - @Override |
| - public float matchCost() { |
| - return matchCost; |
| + |
| + // verify subs that have an two-phase iterator |
| + // least-costly ones first |
| + while (unverifiedMatches.size() > 0) { |
| + DisiWrapper w = unverifiedMatches.pop(); |
| + if (w.twoPhaseView.matches()) { |
| + w.next = null; |
| + verifiedMatches = w; |
| + return true; |
| + } |
| } |
| - }; |
| + |
| + return false; |
| + } |
| + |
| + @Override |
| + public float matchCost() { |
| + return matchCost; |
| + } |
| } |
| |
| @Override |
| @@ -166,13 +167,19 @@ |
| return subScorers.top().doc; |
| } |
| |
| + DisiWrapper getSubMatches() throws IOException { |
| + if (twoPhase == null) { |
| + return subScorers.topList(); |
| + } else { |
| + return twoPhase.getSubMatches(); |
| + } |
| + } |
| + |
| @Override |
| public final int freq() throws IOException { |
| - if (topScorers == null) { |
| - topScorers = subScorers.topList(); |
| - } |
| + DisiWrapper subMatches = getSubMatches(); |
| int freq = 1; |
| - for (DisiWrapper w = topScorers.next; w != null; w = w.next) { |
| + for (DisiWrapper w = subMatches.next; w != null; w = w.next) { |
| freq += 1; |
| } |
| return freq; |
| @@ -180,10 +187,7 @@ |
| |
| @Override |
| public final float score() throws IOException { |
| - if (topScorers == null) { |
| - topScorers = subScorers.topList(); |
| - } |
| - return score(topScorers); |
| + return score(getSubMatches()); |
| } |
| |
| /** Compute the score for the given linked list of scorers. */ |
| Index: lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java (revision 1719161) |
| +++ lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java (working copy) |
| @@ -167,7 +167,7 @@ |
| final float freq; |
| if (scorer instanceof SynonymScorer) { |
| SynonymScorer synScorer = (SynonymScorer) scorer; |
| - freq = synScorer.tf(synScorer.subScorers.topList()); |
| + freq = synScorer.tf(synScorer.getSubMatches()); |
| } else { |
| assert scorer instanceof TermScorer; |
| freq = scorer.freq(); |
| Index: lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java (revision 1719161) |
| +++ lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java (working copy) |
| @@ -1,5 +1,8 @@ |
| package org.apache.lucene.util; |
| |
| +import java.util.Iterator; |
| +import java.util.NoSuchElementException; |
| + |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| @@ -27,10 +30,12 @@ |
| * <code>maxSize+1</code> if instantiated via the |
| * {@link #PriorityQueue(int,boolean)} constructor with <code>prepopulate</code> |
| * set to <code>true</code>. |
| - * |
| + * |
| + * <b>NOTE</b>: Iteration order is not specified. |
| + * |
| * @lucene.internal |
| */ |
| -public abstract class PriorityQueue<T> { |
| +public abstract class PriorityQueue<T> implements Iterable<T> { |
| private int size = 0; |
| private final int maxSize; |
| private final T[] heap; |
| @@ -58,7 +63,7 @@ |
| @SuppressWarnings("unchecked") final T[] h = (T[]) new Object[heapSize]; |
| this.heap = h; |
| this.maxSize = maxSize; |
| - |
| + |
| if (prepopulate) { |
| // If sentinel objects are supported, populate the queue with them |
| T sentinel = getSentinelObject(); |
| @@ -80,22 +85,22 @@ |
| |
| /** |
| * This method can be overridden by extending classes to return a sentinel |
| - * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)} |
| + * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)} |
| * constructor to fill the queue, so that the code which uses that queue can always |
| * assume it's full and only change the top without attempting to insert any new |
| * object.<br> |
| - * |
| + * |
| * Those sentinel values should always compare worse than any non-sentinel |
| * value (i.e., {@link #lessThan} should always favor the |
| * non-sentinel values).<br> |
| - * |
| + * |
| * By default, this method returns null, which means the queue will not be |
| * filled with sentinel values. Otherwise, the value returned will be used to |
| * pre-populate the queue. Adds sentinel values to the queue.<br> |
| - * |
| + * |
| * If this method is extended to return a non-null value, then the following |
| * usage pattern is recommended: |
| - * |
| + * |
| * <pre class="prettyprint"> |
| * // extends getSentinelObject() to return a non-null value. |
| * PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits); |
| @@ -102,19 +107,19 @@ |
| * // save the 'top' element, which is guaranteed to not be null. |
| * MyObject pqTop = pq.top(); |
| * <...> |
| - * // now in order to add a new element, which is 'better' than top (after |
| + * // now in order to add a new element, which is 'better' than top (after |
| * // you've verified it is better), it is as simple as: |
| * pqTop.change(). |
| * pqTop = pq.updateTop(); |
| * </pre> |
| - * |
| + * |
| * <b>NOTE:</b> if this method returns a non-null value, it will be called by |
| - * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor |
| + * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor |
| * {@link #size()} times, relying on a new object to be returned and will not |
| * check if it's null again. Therefore you should ensure any call to this |
| * method creates a new instance and behaves consistently, e.g., it cannot |
| * return null if it previously returned non-null. |
| - * |
| + * |
| * @return the sentinel object to use to pre-populate the queue, or null if |
| * sentinel objects are not supported. |
| */ |
| @@ -126,7 +131,7 @@ |
| * Adds an Object to a PriorityQueue in log(size) time. If one tries to add |
| * more objects than maxSize from initialize an |
| * {@link ArrayIndexOutOfBoundsException} is thrown. |
| - * |
| + * |
| * @return the new 'top' element in the queue. |
| */ |
| public final T add(T element) { |
| @@ -182,24 +187,24 @@ |
| return null; |
| } |
| } |
| - |
| + |
| /** |
| * Should be called when the Object at top changes values. Still log(n) worst |
| * case, but it's at least twice as fast to |
| - * |
| + * |
| * <pre class="prettyprint"> |
| * pq.top().change(); |
| * pq.updateTop(); |
| * </pre> |
| - * |
| + * |
| * instead of |
| - * |
| + * |
| * <pre class="prettyprint"> |
| * o = pq.pop(); |
| * o.change(); |
| * pq.push(o); |
| * </pre> |
| - * |
| + * |
| * @return the new 'top' element. |
| */ |
| public final T updateTop() { |
| @@ -263,7 +268,7 @@ |
| heap[i] = node; // install saved node |
| return i != origPos; |
| } |
| - |
| + |
| private final void downHeap(int i) { |
| T node = heap[i]; // save top node |
| int j = i << 1; // find smaller child |
| @@ -289,4 +294,26 @@ |
| protected final Object[] getHeapArray() { |
| return (Object[]) heap; |
| } |
| + |
| + @Override |
| + public Iterator<T> iterator() { |
| + return new Iterator<T>() { |
| + |
| + int i = 1; |
| + |
| + @Override |
| + public boolean hasNext() { |
| + return i <= size; |
| + } |
| + |
| + @Override |
| + public T next() { |
| + if (hasNext() == false) { |
| + throw new NoSuchElementException(); |
| + } |
| + return heap[i++]; |
| + } |
| + |
| + }; |
| + } |
| } |
| Index: lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java (revision 1719161) |
| +++ lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java (working copy) |
| @@ -18,6 +18,9 @@ |
| */ |
| |
| import java.util.ArrayList; |
| +import java.util.Iterator; |
| +import java.util.List; |
| +import java.util.NoSuchElementException; |
| import java.util.Random; |
| |
| public class TestPriorityQueue extends LuceneTestCase { |
| @@ -188,4 +191,65 @@ |
| } |
| } |
| |
| + public void testIterator() { |
| + IntegerQueue queue = new IntegerQueue(3); |
| + |
| + Iterator<Integer> it = queue.iterator(); |
| + assertFalse(it.hasNext()); |
| + try { |
| + it.next(); |
| + fail(); |
| + } catch (NoSuchElementException e) { |
| + // ok |
| + } |
| + |
| + queue.add(1); |
| + it = queue.iterator(); |
| + assertTrue(it.hasNext()); |
| + assertEquals(Integer.valueOf(1), it.next()); |
| + assertFalse(it.hasNext()); |
| + try { |
| + it.next(); |
| + fail(); |
| + } catch (NoSuchElementException e) { |
| + // ok |
| + } |
| + |
| + queue.add(2); |
| + it = queue.iterator(); |
| + assertTrue(it.hasNext()); |
| + assertEquals(Integer.valueOf(1), it.next()); |
| + assertTrue(it.hasNext()); |
| + assertEquals(Integer.valueOf(2), it.next()); |
| + assertFalse(it.hasNext()); |
| + try { |
| + it.next(); |
| + fail(); |
| + } catch (NoSuchElementException e) { |
| + // ok |
| + } |
| + } |
| + |
| + public void testIteratorRandom() { |
| + final int maxSize = TestUtil.nextInt(random(), 1, 20); |
| + IntegerQueue queue = new IntegerQueue(maxSize); |
| + final int iters = atLeast(100); |
| + final List<Integer> expected = new ArrayList<>(); |
| + for (int iter = 0; iter < iters; ++iter) { |
| + if (queue.size() == 0 || (queue.size() < maxSize && random().nextBoolean())) { |
| + final Integer value = new Integer(random().nextInt(10)); |
| + queue.add(value); |
| + expected.add(value); |
| + } else { |
| + expected.remove(queue.pop()); |
| + } |
| + List<Integer> actual = new ArrayList<>(); |
| + for (Integer value : queue) { |
| + actual.add(value); |
| + } |
| + CollectionUtil.introSort(expected); |
| + CollectionUtil.introSort(actual); |
| + assertEquals(expected, actual); |
| + } |
| + } |
| } |