| Index: src/java/org/apache/lucene/search/DisjunctionSumScorer.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/DisjunctionSumScorer.java (revision 465071) |
| +++ src/java/org/apache/lucene/search/DisjunctionSumScorer.java (working copy) |
| @@ -20,10 +20,11 @@ |
| import java.util.Iterator; |
| import java.io.IOException; |
| |
| -import org.apache.lucene.util.PriorityQueue; |
| +import org.apache.lucene.util.ScorerDocQueue; |
| |
| -/** A Scorer for OR like queries, counterpart of Lucene's <code>ConjunctionScorer</code>. |
| +/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>. |
| * This Scorer implements {@link Scorer#skipTo(int)} and uses skipTo() on the given Scorers. |
| + * @todo Implement score(HitCollector, int). |
| */ |
| class DisjunctionSumScorer extends Scorer { |
| /** The number of subscorers. */ |
| @@ -35,19 +36,20 @@ |
| /** The minimum number of scorers that should match. */ |
| private final int minimumNrMatchers; |
| |
| - /** The scorerQueue contains all subscorers ordered by their current doc(), |
| + /** The scorerDocQueue contains all subscorers ordered by their current doc(), |
| * with the minimum at the top. |
| - * <br>The scorerQueue is initialized the first time next() or skipTo() is called. |
| - * <br>An exhausted scorer is immediately removed from the scorerQueue. |
| + * <br>The scorerDocQueue is initialized the first time next() or skipTo() is called. |
| + * <br>An exhausted scorer is immediately removed from the scorerDocQueue. |
| * <br>If less than the minimumNrMatchers scorers |
| - * remain in the scorerQueue next() and skipTo() return false. |
| + * remain in the scorerDocQueue next() and skipTo() return false. |
| * <p> |
| * After each to call to next() or skipTo() |
| * <code>currentSumScore</code> is the total score of the current matching doc, |
| * <code>nrMatchers</code> is the number of matching scorers, |
| * and all scorers are after the matching doc, or are exhausted. |
| */ |
| - private ScorerQueue scorerQueue = null; |
| + private ScorerDocQueue scorerDocQueue = null; |
| + private int queueSize = -1; // used to avoid size() method calls on scorerDocQueue |
| |
| /** The document number of the current match. */ |
| private int currentDoc = -1; |
| @@ -91,47 +93,65 @@ |
| } |
| |
| /** Called the first time next() or skipTo() is called to |
| - * initialize <code>scorerQueue</code>. |
| + * initialize <code>scorerDocQueue</code>. |
| */ |
| - private void initScorerQueue() throws IOException { |
| + private void initScorerDocQueue() throws IOException { |
| Iterator si = subScorers.iterator(); |
| - scorerQueue = new ScorerQueue(nrScorers); |
| + scorerDocQueue = new ScorerDocQueue(nrScorers); |
| + queueSize = 0; |
| while (si.hasNext()) { |
| Scorer se = (Scorer) si.next(); |
| - if (se.next()) { // doc() method will be used in scorerQueue. |
| - scorerQueue.insert(se); |
| + if (se.next()) { // doc() method will be used in scorerDocQueue. |
| + if (scorerDocQueue.insert(se)) { |
| + queueSize++; |
| + } |
| } |
| } |
| } |
| |
| - /** A <code>PriorityQueue</code> that orders by {@link Scorer#doc()}. */ |
| - private class ScorerQueue extends PriorityQueue { |
| - ScorerQueue(int size) { |
| - initialize(size); |
| + /** Scores and collects all matching documents. |
| + * @param hc The collector to which all matching documents are passed through |
| + * {@link HitCollector#collect(int, float)}. |
| + * <br>When this method is used the {@link #explain(int)} method should not be used. |
| + */ |
| + public void score(HitCollector hc) throws IOException { |
| + while (next()) { |
| + hc.collect(currentDoc, currentScore); |
| } |
| + } |
| |
| - protected boolean lessThan(Object o1, Object o2) { |
| - return ((Scorer)o1).doc() < ((Scorer)o2).doc(); |
| + /** Expert: Collects matching documents in a range. Hook for optimization. |
| + * Note that {@link #next()} must be called once before this method is called |
| + * for the first time. |
| + * @param hc The collector to which all matching documents are passed through |
| + * {@link HitCollector#collect(int, float)}. |
| + * @param max Do not score documents past this. |
| + * @return true if more matching documents may remain. |
| + */ |
| + protected boolean score(HitCollector hc, int max) throws IOException { |
| + while (currentDoc < max) { |
| + hc.collect(currentDoc, currentScore); |
| + if (!next()) { |
| + return false; |
| + } |
| } |
| + return true; |
| } |
| - |
| + |
| public boolean next() throws IOException { |
| - if (scorerQueue == null) { |
| - initScorerQueue(); |
| + if (scorerDocQueue == null) { |
| + initScorerDocQueue(); |
| } |
| - if (scorerQueue.size() < minimumNrMatchers) { |
| - return false; |
| - } else { |
| - return advanceAfterCurrent(); |
| - } |
| + return (scorerDocQueue.size() >= minimumNrMatchers) |
| + && advanceAfterCurrent(); |
| } |
| |
| |
| /** Advance all subscorers after the current document determined by the |
| - * top of the <code>scorerQueue</code>. |
| + * top of the <code>scorerDocQueue</code>. |
| * Repeat until at least the minimum number of subscorers match on the same |
| * document and all subscorers are after that document or are exhausted. |
| - * <br>On entry the <code>scorerQueue</code> has at least <code>minimumNrMatchers</code> |
| + * <br>On entry the <code>scorerDocQueue</code> has at least <code>minimumNrMatchers</code> |
| * available. At least the scorer with the minimum document number will be advanced. |
| * @return true iff there is a match. |
| * <br>In case there is a match, </code>currentDoc</code>, </code>currentSumScore</code>, |
| @@ -140,39 +160,32 @@ |
| * @todo Investigate whether it is possible to use skipTo() when |
| * the minimum number of matchers is bigger than one, ie. try and use the |
| * character of ConjunctionScorer for the minimum number of matchers. |
| + * Also delay calling score() on the sub scorers until the minimum number of |
| + * matchers is reached. |
| + * <br>For this, a Scorer array with minimumNrMatchers elements might |
| + * hold Scorers at currentDoc that are temporarily popped from scorerQueue. |
| */ |
| protected boolean advanceAfterCurrent() throws IOException { |
| do { // repeat until minimum nr of matchers |
| - Scorer top = (Scorer) scorerQueue.top(); |
| - currentDoc = top.doc(); |
| - currentScore = top.score(); |
| + currentDoc = scorerDocQueue.topDoc(); |
| + currentScore = scorerDocQueue.topScore(); |
| nrMatchers = 1; |
| do { // Until all subscorers are after currentDoc |
| - if (top.next()) { |
| - scorerQueue.adjustTop(); |
| - } else { |
| - scorerQueue.pop(); |
| - if (scorerQueue.size() < (minimumNrMatchers - nrMatchers)) { |
| - // Not enough subscorers left for a match on this document, |
| - // and also no more chance of any further match. |
| - return false; |
| - } |
| - if (scorerQueue.size() == 0) { |
| + if (! scorerDocQueue.topNextAndAdjustElsePop()) { |
| + if (--queueSize == 0) { |
| break; // nothing more to advance, check for last match. |
| } |
| } |
| - top = (Scorer) scorerQueue.top(); |
| - if (top.doc() != currentDoc) { |
| + if (scorerDocQueue.topDoc() != currentDoc) { |
| break; // All remaining subscorers are after currentDoc. |
| - } else { |
| - currentScore += top.score(); |
| - nrMatchers++; |
| } |
| + currentScore += scorerDocQueue.topScore(); |
| + nrMatchers++; |
| } while (true); |
| |
| if (nrMatchers >= minimumNrMatchers) { |
| return true; |
| - } else if (scorerQueue.size() < minimumNrMatchers) { |
| + } else if (queueSize < minimumNrMatchers) { |
| return false; |
| } |
| } while (true); |
| @@ -200,40 +213,50 @@ |
| * @return true iff there is such a match. |
| */ |
| public boolean skipTo(int target) throws IOException { |
| - if (scorerQueue == null) { |
| - initScorerQueue(); |
| + if (scorerDocQueue == null) { |
| + initScorerDocQueue(); |
| } |
| - if (scorerQueue.size() < minimumNrMatchers) { |
| + if (queueSize < minimumNrMatchers) { |
| return false; |
| } |
| if (target <= currentDoc) { |
| - return true; |
| + target = currentDoc + 1; |
| } |
| do { |
| - Scorer top = (Scorer) scorerQueue.top(); |
| - if (top.doc() >= target) { |
| + if (scorerDocQueue.topDoc() >= target) { |
| return advanceAfterCurrent(); |
| - } else if (top.skipTo(target)) { |
| - scorerQueue.adjustTop(); |
| - } else { |
| - scorerQueue.pop(); |
| - if (scorerQueue.size() < minimumNrMatchers) { |
| + } else if (! scorerDocQueue.topSkipToAndAdjustElsePop(target)) { |
| + if (--queueSize < minimumNrMatchers) { |
| return false; |
| } |
| } |
| } while (true); |
| } |
| |
| - /** Gives and explanation for the score of a given document. |
| - * @todo Show the resulting score. See BooleanScorer.explain() on how to do this. |
| - */ |
| + /** @return An explanation for the score of a given document. */ |
| public Explanation explain(int doc) throws IOException { |
| Explanation res = new Explanation(); |
| - res.setDescription("At least " + minimumNrMatchers + " of"); |
| Iterator ssi = subScorers.iterator(); |
| + float sumScore = 0.0f; |
| + int nrMatches = 0; |
| while (ssi.hasNext()) { |
| - res.addDetail( ((Scorer) ssi.next()).explain(doc)); |
| + Explanation es = ((Scorer) ssi.next()).explain(doc); |
| + if (es.getValue() > 0.0f) { // indicates match |
| + sumScore += es.getValue(); |
| + nrMatches++; |
| + } |
| + res.addDetail(es); |
| } |
| + if (nrMatchers >= minimumNrMatchers) { |
| + res.setValue(sumScore); |
| + res.setDescription("sum over at least " + minimumNrMatchers |
| + + " of " + subScorers.size() + ":"); |
| + } else { |
| + res.setValue(0.0f); |
| + res.setDescription(nrMatches + " match(es) but at least " |
| + + minimumNrMatchers + " of " |
| + + subScorers.size() + " needed"); |
| + } |
| return res; |
| } |
| } |
| Index: src/java/org/apache/lucene/util/ScorerDocQueue.java |
| =================================================================== |
| --- src/java/org/apache/lucene/util/ScorerDocQueue.java (revision 0) |
| +++ src/java/org/apache/lucene/util/ScorerDocQueue.java (revision 0) |
| @@ -0,0 +1,214 @@ |
| +package org.apache.lucene.util; |
| + |
| +/** |
| + * Copyright 2006 The Apache Software Foundation |
| + * |
| + * Licensed under the Apache License, Version 2.0 (the "License"); |
| + * you may not use this file except in compliance with the License. |
| + * You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +/* Derived from org.apache.lucene.util.PriorityQueue of March 2005 */ |
| + |
| +import java.io.IOException; |
| +import org.apache.lucene.search.Scorer; |
| + |
| +/** A ScorerDocQueue maintains a partial ordering of its Scorers such that the |
| + least Scorer can always be found in constant time. Put()'s and pop()'s |
| + require log(size) time. The ordering is by Scorer.doc(). |
| + */ |
| +public class ScorerDocQueue { // later: SpansQueue for spans with doc and term positions |
| + private final HeapedScorerDoc[] heap; |
| + private final int maxSize; |
| + private int size; |
| + |
| + private class HeapedScorerDoc { |
| + Scorer scorer; |
| + int doc; |
| + |
| + HeapedScorerDoc(Scorer s) { this(s, s.doc()); } |
| + |
| + HeapedScorerDoc(Scorer scorer, int doc) { |
| + this.scorer = scorer; |
| + this.doc = doc; |
| + } |
| + |
| + void adjust() { doc = scorer.doc(); } |
| + } |
| + |
| + private HeapedScorerDoc topHSD; // same as heap[1], only for speed |
| + |
| + /** Create a ScorerDocQueue with a maximum size. */ |
| + public ScorerDocQueue(int maxSize) { |
| + // assert maxSize >= 0; |
| + size = 0; |
| + int heapSize = maxSize + 1; |
| + heap = new HeapedScorerDoc[heapSize]; |
| + this.maxSize = maxSize; |
| + topHSD = heap[1]; // initially null |
| + } |
| + |
| + /** |
| + * Adds a Scorer to a ScorerDocQueue in log(size) time. |
| + * If one tries to add more Scorers than maxSize |
| + * a RuntimeException (ArrayIndexOutOfBound) is thrown. |
| + */ |
| + public final void put(Scorer scorer) { |
| + size++; |
| + heap[size] = new HeapedScorerDoc(scorer); |
| + upHeap(); |
| + } |
| + |
| + /** |
| + * Adds a Scorer to the ScorerDocQueue in log(size) time if either |
| + * the ScorerDocQueue is not full, or not lessThan(scorer, top()). |
| + * @param scorer |
| + * @return true if scorer is added, false otherwise. |
| + */ |
| + public boolean insert(Scorer scorer){ |
| + if (size < maxSize) { |
| + put(scorer); |
| + return true; |
| + } else { |
| + int docNr = scorer.doc(); |
| + if ((size > 0) && (! (docNr < topHSD.doc))) { // heap[1] is top() |
| + heap[1] = new HeapedScorerDoc(scorer, docNr); |
| + downHeap(); |
| + return true; |
| + } else { |
| + return false; |
| + } |
| + } |
| + } |
| + |
| + /** Returns the least Scorer of the ScorerDocQueue in constant time. |
| + * Should not be used when the queue is empty. |
| + */ |
| + public final Scorer top() { |
| + // assert size > 0; |
| + return topHSD.scorer; |
| + } |
| + |
| + /** Returns document number of the least Scorer of the ScorerDocQueue |
| + * in constant time. |
| + * Should not be used when the queue is empty. |
| + */ |
| + public final int topDoc() { |
| + // assert size > 0; |
| + return topHSD.doc; |
| + } |
| + |
| + public final float topScore() throws IOException { |
| + // assert size > 0; |
| + return topHSD.scorer.score(); |
| + } |
| + |
| + public final boolean topNextAndAdjustElsePop() throws IOException { |
| + return checkAdjustElsePop( topHSD.scorer.next()); |
| + } |
| + |
| + public final boolean topSkipToAndAdjustElsePop(int target) throws IOException { |
| + return checkAdjustElsePop( topHSD.scorer.skipTo(target)); |
| + } |
| + |
| + private boolean checkAdjustElsePop(boolean cond) { |
| + if (cond) { // see also adjustTop |
| + topHSD.doc = topHSD.scorer.doc(); |
| + } else { // see also popNoResult |
| + heap[1] = heap[size]; // move last to first |
| + heap[size] = null; |
| + size--; |
| + } |
| + downHeap(); |
| + return cond; |
| + } |
| + |
| + /** Removes and returns the least scorer of the ScorerDocQueue in log(size) |
| + * time. |
| + * Should not be used when the queue is empty. |
| + */ |
| + public final Scorer pop() { |
| + // assert size > 0; |
| + Scorer result = topHSD.scorer; |
| + popNoResult(); |
| + return result; |
| + } |
| + |
| + /** Removes the least scorer of the ScorerDocQueue in log(size) time. |
| + * Should not be used when the queue is empty. |
| + */ |
| + private final void popNoResult() { |
| + heap[1] = heap[size]; // move last to first |
| + heap[size] = null; |
| + size--; |
| + downHeap(); // adjust heap |
| + } |
| + |
| + /** Should be called when the scorer at top changes doc() value. |
| + * Still log(n) worst case, but it's at least twice as fast to <pre> |
| + * { pq.top().change(); pq.adjustTop(); } |
| + * </pre> instead of <pre> |
| + * { o = pq.pop(); o.change(); pq.push(o); } |
| + * </pre> |
| + */ |
| + public final void adjustTop() { |
| + // assert size > 0; |
| + topHSD.adjust(); |
| + downHeap(); |
| + } |
| + |
| + /** Returns the number of scorers currently stored in the ScorerDocQueue. */ |
| + public final int size() { |
| + return size; |
| + } |
| + |
| + /** Removes all entries from the ScorerDocQueue. */ |
| + public final void clear() { |
| + for (int i = 0; i <= size; i++) { |
| + heap[i] = null; |
| + } |
| + size = 0; |
| + } |
| + |
| + private final void upHeap() { |
| + int i = size; |
| + HeapedScorerDoc node = heap[i]; // save bottom node |
| + int j = i >>> 1; |
| + while ((j > 0) && (node.doc < heap[j].doc)) { |
| + heap[i] = heap[j]; // shift parents down |
| + i = j; |
| + j = j >>> 1; |
| + } |
| + heap[i] = node; // install saved node |
| + topHSD = heap[1]; |
| + } |
| + |
| + private final void downHeap() { |
| + int i = 1; |
| + HeapedScorerDoc node = heap[i]; // save top node |
| + int j = i << 1; // find smaller child |
| + int k = j + 1; |
| + if ((k <= size) && (heap[k].doc < heap[j].doc)) { |
| + j = k; |
| + } |
| + while ((j <= size) && (heap[j].doc < node.doc)) { |
| + heap[i] = heap[j]; // shift up child |
| + i = j; |
| + j = i << 1; |
| + k = j + 1; |
| + if (k <= size && (heap[k].doc < heap[j].doc)) { |
| + j = k; |
| + } |
| + } |
| + heap[i] = node; // install saved node |
| + topHSD = heap[1]; |
| + } |
| +} |
| |
| Property changes on: src/java/org/apache/lucene/util/ScorerDocQueue.java |
| ___________________________________________________________________ |
| Name: svn:eol-style |
| + native |
| |