blob: 822297425b0da14be8648162627a27e7b8c2dcb4 [file] [log] [blame]
Index: src/java/org/apache/lucene/search/DisjunctionSumScorer.java
===================================================================
--- src/java/org/apache/lucene/search/DisjunctionSumScorer.java (revision 465071)
+++ src/java/org/apache/lucene/search/DisjunctionSumScorer.java (working copy)
@@ -20,10 +20,11 @@
import java.util.Iterator;
import java.io.IOException;
-import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.ScorerDocQueue;
-/** A Scorer for OR like queries, counterpart of Lucene's <code>ConjunctionScorer</code>.
+/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#skipTo(int)} and uses skipTo() on the given Scorers.
+ * @todo Implement score(HitCollector, int).
*/
class DisjunctionSumScorer extends Scorer {
/** The number of subscorers. */
@@ -35,19 +36,20 @@
/** The minimum number of scorers that should match. */
private final int minimumNrMatchers;
- /** The scorerQueue contains all subscorers ordered by their current doc(),
+ /** The scorerDocQueue contains all subscorers ordered by their current doc(),
* with the minimum at the top.
- * <br>The scorerQueue is initialized the first time next() or skipTo() is called.
- * <br>An exhausted scorer is immediately removed from the scorerQueue.
+ * <br>The scorerDocQueue is initialized the first time next() or skipTo() is called.
+ * <br>An exhausted scorer is immediately removed from the scorerDocQueue.
* <br>If less than the minimumNrMatchers scorers
- * remain in the scorerQueue next() and skipTo() return false.
+ * remain in the scorerDocQueue next() and skipTo() return false.
* <p>
* After each to call to next() or skipTo()
* <code>currentSumScore</code> is the total score of the current matching doc,
* <code>nrMatchers</code> is the number of matching scorers,
* and all scorers are after the matching doc, or are exhausted.
*/
- private ScorerQueue scorerQueue = null;
+ private ScorerDocQueue scorerDocQueue = null;
+ private int queueSize = -1; // used to avoid size() method calls on scorerDocQueue
/** The document number of the current match. */
private int currentDoc = -1;
@@ -91,47 +93,65 @@
}
/** Called the first time next() or skipTo() is called to
- * initialize <code>scorerQueue</code>.
+ * initialize <code>scorerDocQueue</code>.
*/
- private void initScorerQueue() throws IOException {
+ private void initScorerDocQueue() throws IOException {
Iterator si = subScorers.iterator();
- scorerQueue = new ScorerQueue(nrScorers);
+ scorerDocQueue = new ScorerDocQueue(nrScorers);
+ queueSize = 0;
while (si.hasNext()) {
Scorer se = (Scorer) si.next();
- if (se.next()) { // doc() method will be used in scorerQueue.
- scorerQueue.insert(se);
+ if (se.next()) { // doc() method will be used in scorerDocQueue.
+ if (scorerDocQueue.insert(se)) {
+ queueSize++;
+ }
}
}
}
- /** A <code>PriorityQueue</code> that orders by {@link Scorer#doc()}. */
- private class ScorerQueue extends PriorityQueue {
- ScorerQueue(int size) {
- initialize(size);
+ /** Scores and collects all matching documents.
+ * @param hc The collector to which all matching documents are passed through
+ * {@link HitCollector#collect(int, float)}.
+ * <br>When this method is used the {@link #explain(int)} method should not be used.
+ */
+ public void score(HitCollector hc) throws IOException {
+ while (next()) {
+ hc.collect(currentDoc, currentScore);
}
+ }
- protected boolean lessThan(Object o1, Object o2) {
- return ((Scorer)o1).doc() < ((Scorer)o2).doc();
+ /** Expert: Collects matching documents in a range. Hook for optimization.
+ * Note that {@link #next()} must be called once before this method is called
+ * for the first time.
+ * @param hc The collector to which all matching documents are passed through
+ * {@link HitCollector#collect(int, float)}.
+ * @param max Do not score documents past this.
+ * @return true if more matching documents may remain.
+ */
+ protected boolean score(HitCollector hc, int max) throws IOException {
+ while (currentDoc < max) {
+ hc.collect(currentDoc, currentScore);
+ if (!next()) {
+ return false;
+ }
}
+ return true;
}
-
+
public boolean next() throws IOException {
- if (scorerQueue == null) {
- initScorerQueue();
+ if (scorerDocQueue == null) {
+ initScorerDocQueue();
}
- if (scorerQueue.size() < minimumNrMatchers) {
- return false;
- } else {
- return advanceAfterCurrent();
- }
+ return (scorerDocQueue.size() >= minimumNrMatchers)
+ && advanceAfterCurrent();
}
/** Advance all subscorers after the current document determined by the
- * top of the <code>scorerQueue</code>.
+ * top of the <code>scorerDocQueue</code>.
* Repeat until at least the minimum number of subscorers match on the same
* document and all subscorers are after that document or are exhausted.
- * <br>On entry the <code>scorerQueue</code> has at least <code>minimumNrMatchers</code>
+ * <br>On entry the <code>scorerDocQueue</code> has at least <code>minimumNrMatchers</code>
* available. At least the scorer with the minimum document number will be advanced.
* @return true iff there is a match.
* <br>In case there is a match, </code>currentDoc</code>, </code>currentSumScore</code>,
@@ -140,39 +160,32 @@
* @todo Investigate whether it is possible to use skipTo() when
* the minimum number of matchers is bigger than one, ie. try and use the
* character of ConjunctionScorer for the minimum number of matchers.
+ * Also delay calling score() on the sub scorers until the minimum number of
+ * matchers is reached.
+ * <br>For this, a Scorer array with minimumNrMatchers elements might
+ * hold Scorers at currentDoc that are temporarily popped from scorerQueue.
*/
protected boolean advanceAfterCurrent() throws IOException {
do { // repeat until minimum nr of matchers
- Scorer top = (Scorer) scorerQueue.top();
- currentDoc = top.doc();
- currentScore = top.score();
+ currentDoc = scorerDocQueue.topDoc();
+ currentScore = scorerDocQueue.topScore();
nrMatchers = 1;
do { // Until all subscorers are after currentDoc
- if (top.next()) {
- scorerQueue.adjustTop();
- } else {
- scorerQueue.pop();
- if (scorerQueue.size() < (minimumNrMatchers - nrMatchers)) {
- // Not enough subscorers left for a match on this document,
- // and also no more chance of any further match.
- return false;
- }
- if (scorerQueue.size() == 0) {
+ if (! scorerDocQueue.topNextAndAdjustElsePop()) {
+ if (--queueSize == 0) {
break; // nothing more to advance, check for last match.
}
}
- top = (Scorer) scorerQueue.top();
- if (top.doc() != currentDoc) {
+ if (scorerDocQueue.topDoc() != currentDoc) {
break; // All remaining subscorers are after currentDoc.
- } else {
- currentScore += top.score();
- nrMatchers++;
}
+ currentScore += scorerDocQueue.topScore();
+ nrMatchers++;
} while (true);
if (nrMatchers >= minimumNrMatchers) {
return true;
- } else if (scorerQueue.size() < minimumNrMatchers) {
+ } else if (queueSize < minimumNrMatchers) {
return false;
}
} while (true);
@@ -200,40 +213,50 @@
* @return true iff there is such a match.
*/
public boolean skipTo(int target) throws IOException {
- if (scorerQueue == null) {
- initScorerQueue();
+ if (scorerDocQueue == null) {
+ initScorerDocQueue();
}
- if (scorerQueue.size() < minimumNrMatchers) {
+ if (queueSize < minimumNrMatchers) {
return false;
}
if (target <= currentDoc) {
- return true;
+ target = currentDoc + 1;
}
do {
- Scorer top = (Scorer) scorerQueue.top();
- if (top.doc() >= target) {
+ if (scorerDocQueue.topDoc() >= target) {
return advanceAfterCurrent();
- } else if (top.skipTo(target)) {
- scorerQueue.adjustTop();
- } else {
- scorerQueue.pop();
- if (scorerQueue.size() < minimumNrMatchers) {
+ } else if (! scorerDocQueue.topSkipToAndAdjustElsePop(target)) {
+ if (--queueSize < minimumNrMatchers) {
return false;
}
}
} while (true);
}
- /** Gives and explanation for the score of a given document.
- * @todo Show the resulting score. See BooleanScorer.explain() on how to do this.
- */
+ /** @return An explanation for the score of a given document. */
public Explanation explain(int doc) throws IOException {
Explanation res = new Explanation();
- res.setDescription("At least " + minimumNrMatchers + " of");
Iterator ssi = subScorers.iterator();
+ float sumScore = 0.0f;
+ int nrMatches = 0;
while (ssi.hasNext()) {
- res.addDetail( ((Scorer) ssi.next()).explain(doc));
+ Explanation es = ((Scorer) ssi.next()).explain(doc);
+ if (es.getValue() > 0.0f) { // indicates match
+ sumScore += es.getValue();
+ nrMatches++;
+ }
+ res.addDetail(es);
}
+ if (nrMatchers >= minimumNrMatchers) {
+ res.setValue(sumScore);
+ res.setDescription("sum over at least " + minimumNrMatchers
+ + " of " + subScorers.size() + ":");
+ } else {
+ res.setValue(0.0f);
+ res.setDescription(nrMatches + " match(es) but at least "
+ + minimumNrMatchers + " of "
+ + subScorers.size() + " needed");
+ }
return res;
}
}
Index: src/java/org/apache/lucene/util/ScorerDocQueue.java
===================================================================
--- src/java/org/apache/lucene/util/ScorerDocQueue.java (revision 0)
+++ src/java/org/apache/lucene/util/ScorerDocQueue.java (revision 0)
@@ -0,0 +1,214 @@
+package org.apache.lucene.util;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Derived from org.apache.lucene.util.PriorityQueue of March 2005 */
+
+import java.io.IOException;
+import org.apache.lucene.search.Scorer;
+
+/** A ScorerDocQueue maintains a partial ordering of its Scorers such that the
+ least Scorer can always be found in constant time. Put()'s and pop()'s
+ require log(size) time. The ordering is by Scorer.doc().
+ */
+public class ScorerDocQueue { // later: SpansQueue for spans with doc and term positions
+ private final HeapedScorerDoc[] heap;
+ private final int maxSize;
+ private int size;
+
+ private class HeapedScorerDoc {
+ Scorer scorer;
+ int doc;
+
+ HeapedScorerDoc(Scorer s) { this(s, s.doc()); }
+
+ HeapedScorerDoc(Scorer scorer, int doc) {
+ this.scorer = scorer;
+ this.doc = doc;
+ }
+
+ void adjust() { doc = scorer.doc(); }
+ }
+
+ private HeapedScorerDoc topHSD; // same as heap[1], only for speed
+
+ /** Create a ScorerDocQueue with a maximum size. */
+ public ScorerDocQueue(int maxSize) {
+ // assert maxSize >= 0;
+ size = 0;
+ int heapSize = maxSize + 1;
+ heap = new HeapedScorerDoc[heapSize];
+ this.maxSize = maxSize;
+ topHSD = heap[1]; // initially null
+ }
+
+ /**
+ * Adds a Scorer to a ScorerDocQueue in log(size) time.
+ * If one tries to add more Scorers than maxSize
+ * a RuntimeException (ArrayIndexOutOfBound) is thrown.
+ */
+ public final void put(Scorer scorer) {
+ size++;
+ heap[size] = new HeapedScorerDoc(scorer);
+ upHeap();
+ }
+
+ /**
+ * Adds a Scorer to the ScorerDocQueue in log(size) time if either
+ * the ScorerDocQueue is not full, or not lessThan(scorer, top()).
+ * @param scorer
+ * @return true if scorer is added, false otherwise.
+ */
+ public boolean insert(Scorer scorer){
+ if (size < maxSize) {
+ put(scorer);
+ return true;
+ } else {
+ int docNr = scorer.doc();
+ if ((size > 0) && (! (docNr < topHSD.doc))) { // heap[1] is top()
+ heap[1] = new HeapedScorerDoc(scorer, docNr);
+ downHeap();
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /** Returns the least Scorer of the ScorerDocQueue in constant time.
+ * Should not be used when the queue is empty.
+ */
+ public final Scorer top() {
+ // assert size > 0;
+ return topHSD.scorer;
+ }
+
+ /** Returns document number of the least Scorer of the ScorerDocQueue
+ * in constant time.
+ * Should not be used when the queue is empty.
+ */
+ public final int topDoc() {
+ // assert size > 0;
+ return topHSD.doc;
+ }
+
+ public final float topScore() throws IOException {
+ // assert size > 0;
+ return topHSD.scorer.score();
+ }
+
+ public final boolean topNextAndAdjustElsePop() throws IOException {
+ return checkAdjustElsePop( topHSD.scorer.next());
+ }
+
+ public final boolean topSkipToAndAdjustElsePop(int target) throws IOException {
+ return checkAdjustElsePop( topHSD.scorer.skipTo(target));
+ }
+
+ private boolean checkAdjustElsePop(boolean cond) {
+ if (cond) { // see also adjustTop
+ topHSD.doc = topHSD.scorer.doc();
+ } else { // see also popNoResult
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null;
+ size--;
+ }
+ downHeap();
+ return cond;
+ }
+
+ /** Removes and returns the least scorer of the ScorerDocQueue in log(size)
+ * time.
+ * Should not be used when the queue is empty.
+ */
+ public final Scorer pop() {
+ // assert size > 0;
+ Scorer result = topHSD.scorer;
+ popNoResult();
+ return result;
+ }
+
+ /** Removes the least scorer of the ScorerDocQueue in log(size) time.
+ * Should not be used when the queue is empty.
+ */
+ private final void popNoResult() {
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null;
+ size--;
+ downHeap(); // adjust heap
+ }
+
+ /** Should be called when the scorer at top changes doc() value.
+ * Still log(n) worst case, but it's at least twice as fast to <pre>
+ * { pq.top().change(); pq.adjustTop(); }
+ * </pre> instead of <pre>
+ * { o = pq.pop(); o.change(); pq.push(o); }
+ * </pre>
+ */
+ public final void adjustTop() {
+ // assert size > 0;
+ topHSD.adjust();
+ downHeap();
+ }
+
+ /** Returns the number of scorers currently stored in the ScorerDocQueue. */
+ public final int size() {
+ return size;
+ }
+
+ /** Removes all entries from the ScorerDocQueue. */
+ public final void clear() {
+ for (int i = 0; i <= size; i++) {
+ heap[i] = null;
+ }
+ size = 0;
+ }
+
+ private final void upHeap() {
+ int i = size;
+ HeapedScorerDoc node = heap[i]; // save bottom node
+ int j = i >>> 1;
+ while ((j > 0) && (node.doc < heap[j].doc)) {
+ heap[i] = heap[j]; // shift parents down
+ i = j;
+ j = j >>> 1;
+ }
+ heap[i] = node; // install saved node
+ topHSD = heap[1];
+ }
+
+ private final void downHeap() {
+ int i = 1;
+ HeapedScorerDoc node = heap[i]; // save top node
+ int j = i << 1; // find smaller child
+ int k = j + 1;
+ if ((k <= size) && (heap[k].doc < heap[j].doc)) {
+ j = k;
+ }
+ while ((j <= size) && (heap[j].doc < node.doc)) {
+ heap[i] = heap[j]; // shift up child
+ i = j;
+ j = i << 1;
+ k = j + 1;
+ if (k <= size && (heap[k].doc < heap[j].doc)) {
+ j = k;
+ }
+ }
+ heap[i] = node; // install saved node
+ topHSD = heap[1];
+ }
+}
Property changes on: src/java/org/apache/lucene/util/ScorerDocQueue.java
___________________________________________________________________
Name: svn:eol-style
+ native